Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at b4672d37293cb045ec4d57e8b76a62810c96da71 1054 lines 36 kB view raw
1#ifndef _LINUX_MM_H 2#define _LINUX_MM_H 3 4#include <linux/sched.h> 5#include <linux/errno.h> 6 7#ifdef __KERNEL__ 8 9#include <linux/config.h> 10#include <linux/gfp.h> 11#include <linux/list.h> 12#include <linux/mmzone.h> 13#include <linux/rbtree.h> 14#include <linux/prio_tree.h> 15#include <linux/fs.h> 16#include <linux/mutex.h> 17 18struct mempolicy; 19struct anon_vma; 20 21#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ 22extern unsigned long max_mapnr; 23#endif 24 25extern unsigned long num_physpages; 26extern void * high_memory; 27extern unsigned long vmalloc_earlyreserve; 28extern int page_cluster; 29 30#ifdef CONFIG_SYSCTL 31extern int sysctl_legacy_va_layout; 32#else 33#define sysctl_legacy_va_layout 0 34#endif 35 36#include <asm/page.h> 37#include <asm/pgtable.h> 38#include <asm/processor.h> 39#include <asm/atomic.h> 40 41#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) 42 43/* 44 * Linux kernel virtual memory manager primitives. 45 * The idea being to have a "virtual" mm in the same way 46 * we have a virtual fs - giving a cleaner interface to the 47 * mm details, and allowing different kinds of memory mappings 48 * (from shared memory to executable loading to arbitrary 49 * mmap() functions). 50 */ 51 52/* 53 * This struct defines a memory VMM memory area. There is one of these 54 * per VM-area/task. A VM area is any part of the process virtual memory 55 * space that has a special rule for the page-fault handlers (ie a shared 56 * library, the executable area etc). 57 */ 58struct vm_area_struct { 59 struct mm_struct * vm_mm; /* The address space we belong to. */ 60 unsigned long vm_start; /* Our start address within vm_mm. */ 61 unsigned long vm_end; /* The first byte after our end address 62 within vm_mm. */ 63 64 /* linked list of VM areas per task, sorted by address */ 65 struct vm_area_struct *vm_next; 66 67 pgprot_t vm_page_prot; /* Access permissions of this VMA. */ 68 unsigned long vm_flags; /* Flags, listed below. */ 69 70 struct rb_node vm_rb; 71 72 /* 73 * For areas with an address space and backing store, 74 * linkage into the address_space->i_mmap prio tree, or 75 * linkage to the list of like vmas hanging off its node, or 76 * linkage of vma in the address_space->i_mmap_nonlinear list. 77 */ 78 union { 79 struct { 80 struct list_head list; 81 void *parent; /* aligns with prio_tree_node parent */ 82 struct vm_area_struct *head; 83 } vm_set; 84 85 struct raw_prio_tree_node prio_tree_node; 86 } shared; 87 88 /* 89 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma 90 * list, after a COW of one of the file pages. A MAP_SHARED vma 91 * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack 92 * or brk vma (with NULL file) can only be in an anon_vma list. 93 */ 94 struct list_head anon_vma_node; /* Serialized by anon_vma->lock */ 95 struct anon_vma *anon_vma; /* Serialized by page_table_lock */ 96 97 /* Function pointers to deal with this struct. */ 98 struct vm_operations_struct * vm_ops; 99 100 /* Information about our backing store: */ 101 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE 102 units, *not* PAGE_CACHE_SIZE */ 103 struct file * vm_file; /* File we map to (can be NULL). */ 104 void * vm_private_data; /* was vm_pte (shared mem) */ 105 unsigned long vm_truncate_count;/* truncate_count or restart_addr */ 106 107#ifndef CONFIG_MMU 108 atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */ 109#endif 110#ifdef CONFIG_NUMA 111 struct mempolicy *vm_policy; /* NUMA policy for the VMA */ 112#endif 113}; 114 115/* 116 * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is 117 * disabled, then there's a single shared list of VMAs maintained by the 118 * system, and mm's subscribe to these individually 119 */ 120struct vm_list_struct { 121 struct vm_list_struct *next; 122 struct vm_area_struct *vma; 123}; 124 125#ifndef CONFIG_MMU 126extern struct rb_root nommu_vma_tree; 127extern struct rw_semaphore nommu_vma_sem; 128 129extern unsigned int kobjsize(const void *objp); 130#endif 131 132/* 133 * vm_flags.. 134 */ 135#define VM_READ 0x00000001 /* currently active flags */ 136#define VM_WRITE 0x00000002 137#define VM_EXEC 0x00000004 138#define VM_SHARED 0x00000008 139 140/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ 141#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ 142#define VM_MAYWRITE 0x00000020 143#define VM_MAYEXEC 0x00000040 144#define VM_MAYSHARE 0x00000080 145 146#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ 147#define VM_GROWSUP 0x00000200 148#define VM_SHM 0x00000000 /* Means nothing: delete it later */ 149#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ 150#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ 151 152#define VM_EXECUTABLE 0x00001000 153#define VM_LOCKED 0x00002000 154#define VM_IO 0x00004000 /* Memory mapped I/O or similar */ 155 156 /* Used by sys_madvise() */ 157#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ 158#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ 159 160#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ 161#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ 162#define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */ 163#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ 164#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 165#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ 166#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ 167#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ 168 169#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ 170#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS 171#endif 172 173#ifdef CONFIG_STACK_GROWSUP 174#define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) 175#else 176#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) 177#endif 178 179#define VM_READHINTMASK (VM_SEQ_READ | VM_RAND_READ) 180#define VM_ClearReadHint(v) (v)->vm_flags &= ~VM_READHINTMASK 181#define VM_NormalReadHint(v) (!((v)->vm_flags & VM_READHINTMASK)) 182#define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ) 183#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) 184 185/* 186 * mapping from the currently active vm_flags protection bits (the 187 * low four bits) to a page protection mask.. 188 */ 189extern pgprot_t protection_map[16]; 190 191 192/* 193 * These are the virtual MM functions - opening of an area, closing and 194 * unmapping it (needed to keep files on disk up-to-date etc), pointer 195 * to the functions called when a no-page or a wp-page exception occurs. 196 */ 197struct vm_operations_struct { 198 void (*open)(struct vm_area_struct * area); 199 void (*close)(struct vm_area_struct * area); 200 struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); 201 int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); 202#ifdef CONFIG_NUMA 203 int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); 204 struct mempolicy *(*get_policy)(struct vm_area_struct *vma, 205 unsigned long addr); 206#endif 207}; 208 209struct mmu_gather; 210struct inode; 211 212/* 213 * Each physical page in the system has a struct page associated with 214 * it to keep track of whatever it is we are using the page for at the 215 * moment. Note that we have no way to track which tasks are using 216 * a page. 217 */ 218struct page { 219 unsigned long flags; /* Atomic flags, some possibly 220 * updated asynchronously */ 221 atomic_t _count; /* Usage count, see below. */ 222 atomic_t _mapcount; /* Count of ptes mapped in mms, 223 * to show when page is mapped 224 * & limit reverse map searches. 225 */ 226 union { 227 struct { 228 unsigned long private; /* Mapping-private opaque data: 229 * usually used for buffer_heads 230 * if PagePrivate set; used for 231 * swp_entry_t if PageSwapCache. 232 * When page is free, this 233 * indicates order in the buddy 234 * system. 235 */ 236 struct address_space *mapping; /* If low bit clear, points to 237 * inode address_space, or NULL. 238 * If page mapped as anonymous 239 * memory, low bit is set, and 240 * it points to anon_vma object: 241 * see PAGE_MAPPING_ANON below. 242 */ 243 }; 244#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS 245 spinlock_t ptl; 246#endif 247 }; 248 pgoff_t index; /* Our offset within mapping. */ 249 struct list_head lru; /* Pageout list, eg. active_list 250 * protected by zone->lru_lock ! 251 */ 252 /* 253 * On machines where all RAM is mapped into kernel address space, 254 * we can simply calculate the virtual address. On machines with 255 * highmem some memory is mapped into kernel virtual memory 256 * dynamically, so we need a place to store that address. 257 * Note that this field could be 16 bits on x86 ... ;) 258 * 259 * Architectures with slow multiplication can define 260 * WANT_PAGE_VIRTUAL in asm/page.h 261 */ 262#if defined(WANT_PAGE_VIRTUAL) 263 void *virtual; /* Kernel virtual address (NULL if 264 not kmapped, ie. highmem) */ 265#endif /* WANT_PAGE_VIRTUAL */ 266}; 267 268#define page_private(page) ((page)->private) 269#define set_page_private(page, v) ((page)->private = (v)) 270 271/* 272 * FIXME: take this include out, include page-flags.h in 273 * files which need it (119 of them) 274 */ 275#include <linux/page-flags.h> 276 277/* 278 * Methods to modify the page usage count. 279 * 280 * What counts for a page usage: 281 * - cache mapping (page->mapping) 282 * - private data (page->private) 283 * - page mapped in a task's page tables, each mapping 284 * is counted separately 285 * 286 * Also, many kernel routines increase the page count before a critical 287 * routine so they can be sure the page doesn't go away from under them. 288 * 289 * Since 2.6.6 (approx), a free page has ->_count = -1. This is so that we 290 * can use atomic_add_negative(-1, page->_count) to detect when the page 291 * becomes free and so that we can also use atomic_inc_and_test to atomically 292 * detect when we just tried to grab a ref on a page which some other CPU has 293 * already deemed to be freeable. 294 * 295 * NO code should make assumptions about this internal detail! Use the provided 296 * macros which retain the old rules: page_count(page) == 0 is a free page. 297 */ 298 299/* 300 * Drop a ref, return true if the logical refcount fell to zero (the page has 301 * no users) 302 */ 303#define put_page_testzero(p) \ 304 ({ \ 305 BUG_ON(page_count(p) == 0); \ 306 atomic_add_negative(-1, &(p)->_count); \ 307 }) 308 309/* 310 * Grab a ref, return true if the page previously had a logical refcount of 311 * zero. ie: returns true if we just grabbed an already-deemed-to-be-free page 312 */ 313#define get_page_testone(p) atomic_inc_and_test(&(p)->_count) 314 315#define set_page_count(p,v) atomic_set(&(p)->_count, (v) - 1) 316#define __put_page(p) atomic_dec(&(p)->_count) 317 318extern void FASTCALL(__page_cache_release(struct page *)); 319 320static inline int page_count(struct page *page) 321{ 322 if (PageCompound(page)) 323 page = (struct page *)page_private(page); 324 return atomic_read(&page->_count) + 1; 325} 326 327static inline void get_page(struct page *page) 328{ 329 if (unlikely(PageCompound(page))) 330 page = (struct page *)page_private(page); 331 atomic_inc(&page->_count); 332} 333 334void put_page(struct page *page); 335 336/* 337 * Multiple processes may "see" the same page. E.g. for untouched 338 * mappings of /dev/null, all processes see the same page full of 339 * zeroes, and text pages of executables and shared libraries have 340 * only one copy in memory, at most, normally. 341 * 342 * For the non-reserved pages, page_count(page) denotes a reference count. 343 * page_count() == 0 means the page is free. page->lru is then used for 344 * freelist management in the buddy allocator. 345 * page_count() == 1 means the page is used for exactly one purpose 346 * (e.g. a private data page of one process). 347 * 348 * A page may be used for kmalloc() or anyone else who does a 349 * __get_free_page(). In this case the page_count() is at least 1, and 350 * all other fields are unused but should be 0 or NULL. The 351 * management of this page is the responsibility of the one who uses 352 * it. 353 * 354 * The other pages (we may call them "process pages") are completely 355 * managed by the Linux memory manager: I/O, buffers, swapping etc. 356 * The following discussion applies only to them. 357 * 358 * A page may belong to an inode's memory mapping. In this case, 359 * page->mapping is the pointer to the inode, and page->index is the 360 * file offset of the page, in units of PAGE_CACHE_SIZE. 361 * 362 * A page contains an opaque `private' member, which belongs to the 363 * page's address_space. Usually, this is the address of a circular 364 * list of the page's disk buffers. 365 * 366 * For pages belonging to inodes, the page_count() is the number of 367 * attaches, plus 1 if `private' contains something, plus one for 368 * the page cache itself. 369 * 370 * Instead of keeping dirty/clean pages in per address-space lists, we instead 371 * now tag pages as dirty/under writeback in the radix tree. 372 * 373 * There is also a per-mapping radix tree mapping index to the page 374 * in memory if present. The tree is rooted at mapping->root. 375 * 376 * All process pages can do I/O: 377 * - inode pages may need to be read from disk, 378 * - inode pages which have been modified and are MAP_SHARED may need 379 * to be written to disk, 380 * - private pages which have been modified may need to be swapped out 381 * to swap space and (later) to be read back into memory. 382 */ 383 384/* 385 * The zone field is never updated after free_area_init_core() 386 * sets it, so none of the operations on it need to be atomic. 387 */ 388 389 390/* 391 * page->flags layout: 392 * 393 * There are three possibilities for how page->flags get 394 * laid out. The first is for the normal case, without 395 * sparsemem. The second is for sparsemem when there is 396 * plenty of space for node and section. The last is when 397 * we have run out of space and have to fall back to an 398 * alternate (slower) way of determining the node. 399 * 400 * No sparsemem: | NODE | ZONE | ... | FLAGS | 401 * with space for node: | SECTION | NODE | ZONE | ... | FLAGS | 402 * no space for node: | SECTION | ZONE | ... | FLAGS | 403 */ 404#ifdef CONFIG_SPARSEMEM 405#define SECTIONS_WIDTH SECTIONS_SHIFT 406#else 407#define SECTIONS_WIDTH 0 408#endif 409 410#define ZONES_WIDTH ZONES_SHIFT 411 412#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= FLAGS_RESERVED 413#define NODES_WIDTH NODES_SHIFT 414#else 415#define NODES_WIDTH 0 416#endif 417 418/* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ 419#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) 420#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) 421#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) 422 423/* 424 * We are going to use the flags for the page to node mapping if its in 425 * there. This includes the case where there is no node, so it is implicit. 426 */ 427#define FLAGS_HAS_NODE (NODES_WIDTH > 0 || NODES_SHIFT == 0) 428 429#ifndef PFN_SECTION_SHIFT 430#define PFN_SECTION_SHIFT 0 431#endif 432 433/* 434 * Define the bit shifts to access each section. For non-existant 435 * sections we define the shift as 0; that plus a 0 mask ensures 436 * the compiler will optimise away reference to them. 437 */ 438#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) 439#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) 440#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) 441 442/* NODE:ZONE or SECTION:ZONE is used to lookup the zone from a page. */ 443#if FLAGS_HAS_NODE 444#define ZONETABLE_SHIFT (NODES_SHIFT + ZONES_SHIFT) 445#else 446#define ZONETABLE_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) 447#endif 448#define ZONETABLE_PGSHIFT ZONES_PGSHIFT 449 450#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED 451#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED 452#endif 453 454#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) 455#define NODES_MASK ((1UL << NODES_WIDTH) - 1) 456#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) 457#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) 458 459static inline unsigned long page_zonenum(struct page *page) 460{ 461 return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; 462} 463 464struct zone; 465extern struct zone *zone_table[]; 466 467static inline struct zone *page_zone(struct page *page) 468{ 469 return zone_table[(page->flags >> ZONETABLE_PGSHIFT) & 470 ZONETABLE_MASK]; 471} 472 473static inline unsigned long page_to_nid(struct page *page) 474{ 475 if (FLAGS_HAS_NODE) 476 return (page->flags >> NODES_PGSHIFT) & NODES_MASK; 477 else 478 return page_zone(page)->zone_pgdat->node_id; 479} 480static inline unsigned long page_to_section(struct page *page) 481{ 482 return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; 483} 484 485static inline void set_page_zone(struct page *page, unsigned long zone) 486{ 487 page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); 488 page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; 489} 490static inline void set_page_node(struct page *page, unsigned long node) 491{ 492 page->flags &= ~(NODES_MASK << NODES_PGSHIFT); 493 page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; 494} 495static inline void set_page_section(struct page *page, unsigned long section) 496{ 497 page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); 498 page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; 499} 500 501static inline void set_page_links(struct page *page, unsigned long zone, 502 unsigned long node, unsigned long pfn) 503{ 504 set_page_zone(page, zone); 505 set_page_node(page, node); 506 set_page_section(page, pfn_to_section_nr(pfn)); 507} 508 509#ifndef CONFIG_DISCONTIGMEM 510/* The array of struct pages - for discontigmem use pgdat->lmem_map */ 511extern struct page *mem_map; 512#endif 513 514static inline void *lowmem_page_address(struct page *page) 515{ 516 return __va(page_to_pfn(page) << PAGE_SHIFT); 517} 518 519#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) 520#define HASHED_PAGE_VIRTUAL 521#endif 522 523#if defined(WANT_PAGE_VIRTUAL) 524#define page_address(page) ((page)->virtual) 525#define set_page_address(page, address) \ 526 do { \ 527 (page)->virtual = (address); \ 528 } while(0) 529#define page_address_init() do { } while(0) 530#endif 531 532#if defined(HASHED_PAGE_VIRTUAL) 533void *page_address(struct page *page); 534void set_page_address(struct page *page, void *virtual); 535void page_address_init(void); 536#endif 537 538#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) 539#define page_address(page) lowmem_page_address(page) 540#define set_page_address(page, address) do { } while(0) 541#define page_address_init() do { } while(0) 542#endif 543 544/* 545 * On an anonymous page mapped into a user virtual memory area, 546 * page->mapping points to its anon_vma, not to a struct address_space; 547 * with the PAGE_MAPPING_ANON bit set to distinguish it. 548 * 549 * Please note that, confusingly, "page_mapping" refers to the inode 550 * address_space which maps the page from disk; whereas "page_mapped" 551 * refers to user virtual address space into which the page is mapped. 552 */ 553#define PAGE_MAPPING_ANON 1 554 555extern struct address_space swapper_space; 556static inline struct address_space *page_mapping(struct page *page) 557{ 558 struct address_space *mapping = page->mapping; 559 560 if (unlikely(PageSwapCache(page))) 561 mapping = &swapper_space; 562 else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) 563 mapping = NULL; 564 return mapping; 565} 566 567static inline int PageAnon(struct page *page) 568{ 569 return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; 570} 571 572/* 573 * Return the pagecache index of the passed page. Regular pagecache pages 574 * use ->index whereas swapcache pages use ->private 575 */ 576static inline pgoff_t page_index(struct page *page) 577{ 578 if (unlikely(PageSwapCache(page))) 579 return page_private(page); 580 return page->index; 581} 582 583/* 584 * The atomic page->_mapcount, like _count, starts from -1: 585 * so that transitions both from it and to it can be tracked, 586 * using atomic_inc_and_test and atomic_add_negative(-1). 587 */ 588static inline void reset_page_mapcount(struct page *page) 589{ 590 atomic_set(&(page)->_mapcount, -1); 591} 592 593static inline int page_mapcount(struct page *page) 594{ 595 return atomic_read(&(page)->_mapcount) + 1; 596} 597 598/* 599 * Return true if this page is mapped into pagetables. 600 */ 601static inline int page_mapped(struct page *page) 602{ 603 return atomic_read(&(page)->_mapcount) >= 0; 604} 605 606/* 607 * Error return values for the *_nopage functions 608 */ 609#define NOPAGE_SIGBUS (NULL) 610#define NOPAGE_OOM ((struct page *) (-1)) 611 612/* 613 * Different kinds of faults, as returned by handle_mm_fault(). 614 * Used to decide whether a process gets delivered SIGBUS or 615 * just gets major/minor fault counters bumped up. 616 */ 617#define VM_FAULT_OOM 0x00 618#define VM_FAULT_SIGBUS 0x01 619#define VM_FAULT_MINOR 0x02 620#define VM_FAULT_MAJOR 0x03 621 622/* 623 * Special case for get_user_pages. 624 * Must be in a distinct bit from the above VM_FAULT_ flags. 625 */ 626#define VM_FAULT_WRITE 0x10 627 628#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) 629 630extern void show_free_areas(void); 631 632#ifdef CONFIG_SHMEM 633struct page *shmem_nopage(struct vm_area_struct *vma, 634 unsigned long address, int *type); 635int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new); 636struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, 637 unsigned long addr); 638int shmem_lock(struct file *file, int lock, struct user_struct *user); 639#else 640#define shmem_nopage filemap_nopage 641 642static inline int shmem_lock(struct file *file, int lock, 643 struct user_struct *user) 644{ 645 return 0; 646} 647 648static inline int shmem_set_policy(struct vm_area_struct *vma, 649 struct mempolicy *new) 650{ 651 return 0; 652} 653 654static inline struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, 655 unsigned long addr) 656{ 657 return NULL; 658} 659#endif 660struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags); 661extern int shmem_mmap(struct file *file, struct vm_area_struct *vma); 662 663int shmem_zero_setup(struct vm_area_struct *); 664 665#ifndef CONFIG_MMU 666extern unsigned long shmem_get_unmapped_area(struct file *file, 667 unsigned long addr, 668 unsigned long len, 669 unsigned long pgoff, 670 unsigned long flags); 671#endif 672 673static inline int can_do_mlock(void) 674{ 675 if (capable(CAP_IPC_LOCK)) 676 return 1; 677 if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0) 678 return 1; 679 return 0; 680} 681extern int user_shm_lock(size_t, struct user_struct *); 682extern void user_shm_unlock(size_t, struct user_struct *); 683 684/* 685 * Parameter block passed down to zap_pte_range in exceptional cases. 686 */ 687struct zap_details { 688 struct vm_area_struct *nonlinear_vma; /* Check page->index if set */ 689 struct address_space *check_mapping; /* Check page->mapping if set */ 690 pgoff_t first_index; /* Lowest page->index to unmap */ 691 pgoff_t last_index; /* Highest page->index to unmap */ 692 spinlock_t *i_mmap_lock; /* For unmap_mapping_range: */ 693 unsigned long truncate_count; /* Compare vm_truncate_count */ 694}; 695 696struct page *vm_normal_page(struct vm_area_struct *, unsigned long, pte_t); 697unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, 698 unsigned long size, struct zap_details *); 699unsigned long unmap_vmas(struct mmu_gather **tlb, 700 struct vm_area_struct *start_vma, unsigned long start_addr, 701 unsigned long end_addr, unsigned long *nr_accounted, 702 struct zap_details *); 703void free_pgd_range(struct mmu_gather **tlb, unsigned long addr, 704 unsigned long end, unsigned long floor, unsigned long ceiling); 705void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma, 706 unsigned long floor, unsigned long ceiling); 707int copy_page_range(struct mm_struct *dst, struct mm_struct *src, 708 struct vm_area_struct *vma); 709int zeromap_page_range(struct vm_area_struct *vma, unsigned long from, 710 unsigned long size, pgprot_t prot); 711void unmap_mapping_range(struct address_space *mapping, 712 loff_t const holebegin, loff_t const holelen, int even_cows); 713 714static inline void unmap_shared_mapping_range(struct address_space *mapping, 715 loff_t const holebegin, loff_t const holelen) 716{ 717 unmap_mapping_range(mapping, holebegin, holelen, 0); 718} 719 720extern int vmtruncate(struct inode * inode, loff_t offset); 721extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); 722extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); 723extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); 724 725#ifdef CONFIG_MMU 726extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, 727 unsigned long address, int write_access); 728 729static inline int handle_mm_fault(struct mm_struct *mm, 730 struct vm_area_struct *vma, unsigned long address, 731 int write_access) 732{ 733 return __handle_mm_fault(mm, vma, address, write_access) & 734 (~VM_FAULT_WRITE); 735} 736#else 737static inline int handle_mm_fault(struct mm_struct *mm, 738 struct vm_area_struct *vma, unsigned long address, 739 int write_access) 740{ 741 /* should never happen if there's no MMU */ 742 BUG(); 743 return VM_FAULT_SIGBUS; 744} 745#endif 746 747extern int make_pages_present(unsigned long addr, unsigned long end); 748extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); 749void install_arg_page(struct vm_area_struct *, struct page *, unsigned long); 750 751int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, 752 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); 753void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long); 754 755int __set_page_dirty_buffers(struct page *page); 756int __set_page_dirty_nobuffers(struct page *page); 757int redirty_page_for_writepage(struct writeback_control *wbc, 758 struct page *page); 759int FASTCALL(set_page_dirty(struct page *page)); 760int set_page_dirty_lock(struct page *page); 761int clear_page_dirty_for_io(struct page *page); 762 763extern unsigned long do_mremap(unsigned long addr, 764 unsigned long old_len, unsigned long new_len, 765 unsigned long flags, unsigned long new_addr); 766 767/* 768 * Prototype to add a shrinker callback for ageable caches. 769 * 770 * These functions are passed a count `nr_to_scan' and a gfpmask. They should 771 * scan `nr_to_scan' objects, attempting to free them. 772 * 773 * The callback must return the number of objects which remain in the cache. 774 * 775 * The callback will be passed nr_to_scan == 0 when the VM is querying the 776 * cache size, so a fastpath for that case is appropriate. 777 */ 778typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask); 779 780/* 781 * Add an aging callback. The int is the number of 'seeks' it takes 782 * to recreate one of the objects that these functions age. 783 */ 784 785#define DEFAULT_SEEKS 2 786struct shrinker; 787extern struct shrinker *set_shrinker(int, shrinker_t); 788extern void remove_shrinker(struct shrinker *shrinker); 789 790extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); 791 792int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); 793int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); 794int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); 795int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); 796 797/* 798 * The following ifdef needed to get the 4level-fixup.h header to work. 799 * Remove it when 4level-fixup.h has been removed. 800 */ 801#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) 802static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 803{ 804 return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? 805 NULL: pud_offset(pgd, address); 806} 807 808static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) 809{ 810 return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? 811 NULL: pmd_offset(pud, address); 812} 813#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ 814 815#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS 816/* 817 * We tuck a spinlock to guard each pagetable page into its struct page, 818 * at page->private, with BUILD_BUG_ON to make sure that this will not 819 * overflow into the next struct page (as it might with DEBUG_SPINLOCK). 820 * When freeing, reset page->mapping so free_pages_check won't complain. 821 */ 822#define __pte_lockptr(page) &((page)->ptl) 823#define pte_lock_init(_page) do { \ 824 spin_lock_init(__pte_lockptr(_page)); \ 825} while (0) 826#define pte_lock_deinit(page) ((page)->mapping = NULL) 827#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) 828#else 829/* 830 * We use mm->page_table_lock to guard all pagetable pages of the mm. 831 */ 832#define pte_lock_init(page) do {} while (0) 833#define pte_lock_deinit(page) do {} while (0) 834#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) 835#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ 836 837#define pte_offset_map_lock(mm, pmd, address, ptlp) \ 838({ \ 839 spinlock_t *__ptl = pte_lockptr(mm, pmd); \ 840 pte_t *__pte = pte_offset_map(pmd, address); \ 841 *(ptlp) = __ptl; \ 842 spin_lock(__ptl); \ 843 __pte; \ 844}) 845 846#define pte_unmap_unlock(pte, ptl) do { \ 847 spin_unlock(ptl); \ 848 pte_unmap(pte); \ 849} while (0) 850 851#define pte_alloc_map(mm, pmd, address) \ 852 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ 853 NULL: pte_offset_map(pmd, address)) 854 855#define pte_alloc_map_lock(mm, pmd, address, ptlp) \ 856 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ 857 NULL: pte_offset_map_lock(mm, pmd, address, ptlp)) 858 859#define pte_alloc_kernel(pmd, address) \ 860 ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ 861 NULL: pte_offset_kernel(pmd, address)) 862 863extern void free_area_init(unsigned long * zones_size); 864extern void free_area_init_node(int nid, pg_data_t *pgdat, 865 unsigned long * zones_size, unsigned long zone_start_pfn, 866 unsigned long *zholes_size); 867extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long); 868extern void setup_per_zone_pages_min(void); 869extern void mem_init(void); 870extern void show_mem(void); 871extern void si_meminfo(struct sysinfo * val); 872extern void si_meminfo_node(struct sysinfo *val, int nid); 873 874#ifdef CONFIG_NUMA 875extern void setup_per_cpu_pageset(void); 876#else 877static inline void setup_per_cpu_pageset(void) {} 878#endif 879 880/* prio_tree.c */ 881void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); 882void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *); 883void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *); 884struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma, 885 struct prio_tree_iter *iter); 886 887#define vma_prio_tree_foreach(vma, iter, root, begin, end) \ 888 for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \ 889 (vma = vma_prio_tree_next(vma, iter)); ) 890 891static inline void vma_nonlinear_insert(struct vm_area_struct *vma, 892 struct list_head *list) 893{ 894 vma->shared.vm_set.parent = NULL; 895 list_add_tail(&vma->shared.vm_set.list, list); 896} 897 898/* mmap.c */ 899extern int __vm_enough_memory(long pages, int cap_sys_admin); 900extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, 901 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); 902extern struct vm_area_struct *vma_merge(struct mm_struct *, 903 struct vm_area_struct *prev, unsigned long addr, unsigned long end, 904 unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, 905 struct mempolicy *); 906extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); 907extern int split_vma(struct mm_struct *, 908 struct vm_area_struct *, unsigned long addr, int new_below); 909extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); 910extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, 911 struct rb_node **, struct rb_node *); 912extern void unlink_file_vma(struct vm_area_struct *); 913extern struct vm_area_struct *copy_vma(struct vm_area_struct **, 914 unsigned long addr, unsigned long len, pgoff_t pgoff); 915extern void exit_mmap(struct mm_struct *); 916extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); 917 918extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 919 920extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, 921 unsigned long len, unsigned long prot, 922 unsigned long flag, unsigned long pgoff); 923 924static inline unsigned long do_mmap(struct file *file, unsigned long addr, 925 unsigned long len, unsigned long prot, 926 unsigned long flag, unsigned long offset) 927{ 928 unsigned long ret = -EINVAL; 929 if ((offset + PAGE_ALIGN(len)) < offset) 930 goto out; 931 if (!(offset & ~PAGE_MASK)) 932 ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); 933out: 934 return ret; 935} 936 937extern int do_munmap(struct mm_struct *, unsigned long, size_t); 938 939extern unsigned long do_brk(unsigned long, unsigned long); 940 941/* filemap.c */ 942extern unsigned long page_unuse(struct page *); 943extern void truncate_inode_pages(struct address_space *, loff_t); 944extern void truncate_inode_pages_range(struct address_space *, 945 loff_t lstart, loff_t lend); 946 947/* generic vm_area_ops exported for stackable file systems */ 948extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *); 949extern int filemap_populate(struct vm_area_struct *, unsigned long, 950 unsigned long, pgprot_t, unsigned long, int); 951 952/* mm/page-writeback.c */ 953int write_one_page(struct page *page, int wait); 954 955/* readahead.c */ 956#define VM_MAX_READAHEAD 128 /* kbytes */ 957#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */ 958#define VM_MAX_CACHE_HIT 256 /* max pages in a row in cache before 959 * turning readahead off */ 960 961int do_page_cache_readahead(struct address_space *mapping, struct file *filp, 962 pgoff_t offset, unsigned long nr_to_read); 963int force_page_cache_readahead(struct address_space *mapping, struct file *filp, 964 pgoff_t offset, unsigned long nr_to_read); 965unsigned long page_cache_readahead(struct address_space *mapping, 966 struct file_ra_state *ra, 967 struct file *filp, 968 pgoff_t offset, 969 unsigned long size); 970void handle_ra_miss(struct address_space *mapping, 971 struct file_ra_state *ra, pgoff_t offset); 972unsigned long max_sane_readahead(unsigned long nr); 973 974/* Do stack extension */ 975extern int expand_stack(struct vm_area_struct *vma, unsigned long address); 976#ifdef CONFIG_IA64 977extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); 978#endif 979 980/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 981extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); 982extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, 983 struct vm_area_struct **pprev); 984 985/* Look up the first VMA which intersects the interval start_addr..end_addr-1, 986 NULL if none. Assume start_addr < end_addr. */ 987static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) 988{ 989 struct vm_area_struct * vma = find_vma(mm,start_addr); 990 991 if (vma && end_addr <= vma->vm_start) 992 vma = NULL; 993 return vma; 994} 995 996static inline unsigned long vma_pages(struct vm_area_struct *vma) 997{ 998 return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 999} 1000 1001struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); 1002struct page *vmalloc_to_page(void *addr); 1003unsigned long vmalloc_to_pfn(void *addr); 1004int remap_pfn_range(struct vm_area_struct *, unsigned long addr, 1005 unsigned long pfn, unsigned long size, pgprot_t); 1006int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *); 1007 1008struct page *follow_page(struct vm_area_struct *, unsigned long address, 1009 unsigned int foll_flags); 1010#define FOLL_WRITE 0x01 /* check pte is writable */ 1011#define FOLL_TOUCH 0x02 /* mark page accessed */ 1012#define FOLL_GET 0x04 /* do get_page on page */ 1013#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ 1014 1015#ifdef CONFIG_PROC_FS 1016void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long); 1017#else 1018static inline void vm_stat_account(struct mm_struct *mm, 1019 unsigned long flags, struct file *file, long pages) 1020{ 1021} 1022#endif /* CONFIG_PROC_FS */ 1023 1024#ifndef CONFIG_DEBUG_PAGEALLOC 1025static inline void 1026kernel_map_pages(struct page *page, int numpages, int enable) 1027{ 1028 if (!PageHighMem(page) && !enable) 1029 mutex_debug_check_no_locks_freed(page_address(page), 1030 page_address(page + numpages)); 1031} 1032#endif 1033 1034extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); 1035#ifdef __HAVE_ARCH_GATE_AREA 1036int in_gate_area_no_task(unsigned long addr); 1037int in_gate_area(struct task_struct *task, unsigned long addr); 1038#else 1039int in_gate_area_no_task(unsigned long addr); 1040#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) 1041#endif /* __HAVE_ARCH_GATE_AREA */ 1042 1043/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */ 1044#define OOM_DISABLE -17 1045 1046int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, 1047 void __user *, size_t *, loff_t *); 1048int shrink_slab(unsigned long scanned, gfp_t gfp_mask, 1049 unsigned long lru_pages); 1050void drop_pagecache(void); 1051void drop_slab(void); 1052 1053#endif /* __KERNEL__ */ 1054#endif /* _LINUX_MM_H */