include/linux/mm.h at 1cdca61bf8537043edde8ef784ce1a1351361dac

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / include / linux / mm.h
at 1cdca61bf8537043edde8ef784ce1a1351361dac 1010 lines 35 kB view raw
wrap content
   1#ifndef _LINUX_MM_H
   2#define _LINUX_MM_H
   3
   4#include <linux/sched.h>
   5#include <linux/errno.h>
   6
   7#ifdef __KERNEL__
   8
   9#include <linux/config.h>
  10#include <linux/gfp.h>
  11#include <linux/list.h>
  12#include <linux/mmzone.h>
  13#include <linux/rbtree.h>
  14#include <linux/prio_tree.h>
  15#include <linux/fs.h>
  16
  17struct mempolicy;
  18struct anon_vma;
  19
  20#ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
  21extern unsigned long max_mapnr;
  22#endif
  23
  24extern unsigned long num_physpages;
  25extern void * high_memory;
  26extern unsigned long vmalloc_earlyreserve;
  27extern int page_cluster;
  28
  29#ifdef CONFIG_SYSCTL
  30extern int sysctl_legacy_va_layout;
  31#else
  32#define sysctl_legacy_va_layout 0
  33#endif
  34
  35#include <asm/page.h>
  36#include <asm/pgtable.h>
  37#include <asm/processor.h>
  38#include <asm/atomic.h>
  39
  40#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
  41
  42/*
  43 * Linux kernel virtual memory manager primitives.
  44 * The idea being to have a "virtual" mm in the same way
  45 * we have a virtual fs - giving a cleaner interface to the
  46 * mm details, and allowing different kinds of memory mappings
  47 * (from shared memory to executable loading to arbitrary
  48 * mmap() functions).
  49 */
  50
  51/*
  52 * This struct defines a memory VMM memory area. There is one of these
  53 * per VM-area/task.  A VM area is any part of the process virtual memory
  54 * space that has a special rule for the page-fault handlers (ie a shared
  55 * library, the executable area etc).
  56 */
  57struct vm_area_struct {
  58	struct mm_struct * vm_mm;	/* The address space we belong to. */
  59	unsigned long vm_start;		/* Our start address within vm_mm. */
  60	unsigned long vm_end;		/* The first byte after our end address
  61					   within vm_mm. */
  62
  63	/* linked list of VM areas per task, sorted by address */
  64	struct vm_area_struct *vm_next;
  65
  66	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
  67	unsigned long vm_flags;		/* Flags, listed below. */
  68
  69	struct rb_node vm_rb;
  70
  71	/*
  72	 * For areas with an address space and backing store,
  73	 * linkage into the address_space->i_mmap prio tree, or
  74	 * linkage to the list of like vmas hanging off its node, or
  75	 * linkage of vma in the address_space->i_mmap_nonlinear list.
  76	 */
  77	union {
  78		struct {
  79			struct list_head list;
  80			void *parent;	/* aligns with prio_tree_node parent */
  81			struct vm_area_struct *head;
  82		} vm_set;
  83
  84		struct raw_prio_tree_node prio_tree_node;
  85	} shared;
  86
  87	/*
  88	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
  89	 * list, after a COW of one of the file pages.  A MAP_SHARED vma
  90	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
  91	 * or brk vma (with NULL file) can only be in an anon_vma list.
  92	 */
  93	struct list_head anon_vma_node;	/* Serialized by anon_vma->lock */
  94	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
  95
  96	/* Function pointers to deal with this struct. */
  97	struct vm_operations_struct * vm_ops;
  98
  99	/* Information about our backing store: */
 100	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
 101					   units, *not* PAGE_CACHE_SIZE */
 102	struct file * vm_file;		/* File we map to (can be NULL). */
 103	void * vm_private_data;		/* was vm_pte (shared mem) */
 104	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
 105
 106#ifndef CONFIG_MMU
 107	atomic_t vm_usage;		/* refcount (VMAs shared if !MMU) */
 108#endif
 109#ifdef CONFIG_NUMA
 110	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
 111#endif
 112};
 113
 114/*
 115 * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is
 116 * disabled, then there's a single shared list of VMAs maintained by the
 117 * system, and mm's subscribe to these individually
 118 */
 119struct vm_list_struct {
 120	struct vm_list_struct	*next;
 121	struct vm_area_struct	*vma;
 122};
 123
 124#ifndef CONFIG_MMU
 125extern struct rb_root nommu_vma_tree;
 126extern struct rw_semaphore nommu_vma_sem;
 127
 128extern unsigned int kobjsize(const void *objp);
 129#endif
 130
 131/*
 132 * vm_flags..
 133 */
 134#define VM_READ		0x00000001	/* currently active flags */
 135#define VM_WRITE	0x00000002
 136#define VM_EXEC		0x00000004
 137#define VM_SHARED	0x00000008
 138
 139/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
 140#define VM_MAYREAD	0x00000010	/* limits for mprotect() etc */
 141#define VM_MAYWRITE	0x00000020
 142#define VM_MAYEXEC	0x00000040
 143#define VM_MAYSHARE	0x00000080
 144
 145#define VM_GROWSDOWN	0x00000100	/* general info on the segment */
 146#define VM_GROWSUP	0x00000200
 147#define VM_SHM		0x00000400	/* shared memory area, don't swap out */
 148#define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
 149
 150#define VM_EXECUTABLE	0x00001000
 151#define VM_LOCKED	0x00002000
 152#define VM_IO           0x00004000	/* Memory mapped I/O or similar */
 153
 154					/* Used by sys_madvise() */
 155#define VM_SEQ_READ	0x00008000	/* App will access data sequentially */
 156#define VM_RAND_READ	0x00010000	/* App will not benefit from clustered reads */
 157
 158#define VM_DONTCOPY	0x00020000      /* Do not copy this vma on fork */
 159#define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
 160#define VM_RESERVED	0x00080000	/* Pages managed in a special way */
 161#define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
 162#define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 163#define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 164#define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
 165
 166#ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 167#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
 168#endif
 169
 170#ifdef CONFIG_STACK_GROWSUP
 171#define VM_STACK_FLAGS	(VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
 172#else
 173#define VM_STACK_FLAGS	(VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
 174#endif
 175
 176#define VM_READHINTMASK			(VM_SEQ_READ | VM_RAND_READ)
 177#define VM_ClearReadHint(v)		(v)->vm_flags &= ~VM_READHINTMASK
 178#define VM_NormalReadHint(v)		(!((v)->vm_flags & VM_READHINTMASK))
 179#define VM_SequentialReadHint(v)	((v)->vm_flags & VM_SEQ_READ)
 180#define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
 181
 182/*
 183 * mapping from the currently active vm_flags protection bits (the
 184 * low four bits) to a page protection mask..
 185 */
 186extern pgprot_t protection_map[16];
 187
 188
 189/*
 190 * These are the virtual MM functions - opening of an area, closing and
 191 * unmapping it (needed to keep files on disk up-to-date etc), pointer
 192 * to the functions called when a no-page or a wp-page exception occurs. 
 193 */
 194struct vm_operations_struct {
 195	void (*open)(struct vm_area_struct * area);
 196	void (*close)(struct vm_area_struct * area);
 197	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
 198	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
 199#ifdef CONFIG_NUMA
 200	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
 201	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
 202					unsigned long addr);
 203#endif
 204};
 205
 206struct mmu_gather;
 207struct inode;
 208
 209/*
 210 * Each physical page in the system has a struct page associated with
 211 * it to keep track of whatever it is we are using the page for at the
 212 * moment. Note that we have no way to track which tasks are using
 213 * a page.
 214 */
 215struct page {
 216	unsigned long flags;		/* Atomic flags, some possibly
 217					 * updated asynchronously */
 218	atomic_t _count;		/* Usage count, see below. */
 219	atomic_t _mapcount;		/* Count of ptes mapped in mms,
 220					 * to show when page is mapped
 221					 * & limit reverse map searches.
 222					 */
 223	union {
 224		unsigned long private;	/* Mapping-private opaque data:
 225					 * usually used for buffer_heads
 226					 * if PagePrivate set; used for
 227					 * swp_entry_t if PageSwapCache
 228					 * When page is free, this indicates
 229					 * order in the buddy system.
 230					 */
 231#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
 232		spinlock_t ptl;
 233#endif
 234	} u;
 235	struct address_space *mapping;	/* If low bit clear, points to
 236					 * inode address_space, or NULL.
 237					 * If page mapped as anonymous
 238					 * memory, low bit is set, and
 239					 * it points to anon_vma object:
 240					 * see PAGE_MAPPING_ANON below.
 241					 */
 242	pgoff_t index;			/* Our offset within mapping. */
 243	struct list_head lru;		/* Pageout list, eg. active_list
 244					 * protected by zone->lru_lock !
 245					 */
 246	/*
 247	 * On machines where all RAM is mapped into kernel address space,
 248	 * we can simply calculate the virtual address. On machines with
 249	 * highmem some memory is mapped into kernel virtual memory
 250	 * dynamically, so we need a place to store that address.
 251	 * Note that this field could be 16 bits on x86 ... ;)
 252	 *
 253	 * Architectures with slow multiplication can define
 254	 * WANT_PAGE_VIRTUAL in asm/page.h
 255	 */
 256#if defined(WANT_PAGE_VIRTUAL)
 257	void *virtual;			/* Kernel virtual address (NULL if
 258					   not kmapped, ie. highmem) */
 259#endif /* WANT_PAGE_VIRTUAL */
 260};
 261
 262#define page_private(page)		((page)->u.private)
 263#define set_page_private(page, v)	((page)->u.private = (v))
 264
 265/*
 266 * FIXME: take this include out, include page-flags.h in
 267 * files which need it (119 of them)
 268 */
 269#include <linux/page-flags.h>
 270
 271/*
 272 * Methods to modify the page usage count.
 273 *
 274 * What counts for a page usage:
 275 * - cache mapping   (page->mapping)
 276 * - private data    (page->private)
 277 * - page mapped in a task's page tables, each mapping
 278 *   is counted separately
 279 *
 280 * Also, many kernel routines increase the page count before a critical
 281 * routine so they can be sure the page doesn't go away from under them.
 282 *
 283 * Since 2.6.6 (approx), a free page has ->_count = -1.  This is so that we
 284 * can use atomic_add_negative(-1, page->_count) to detect when the page
 285 * becomes free and so that we can also use atomic_inc_and_test to atomically
 286 * detect when we just tried to grab a ref on a page which some other CPU has
 287 * already deemed to be freeable.
 288 *
 289 * NO code should make assumptions about this internal detail!  Use the provided
 290 * macros which retain the old rules: page_count(page) == 0 is a free page.
 291 */
 292
 293/*
 294 * Drop a ref, return true if the logical refcount fell to zero (the page has
 295 * no users)
 296 */
 297#define put_page_testzero(p)				\
 298	({						\
 299		BUG_ON(page_count(p) == 0);		\
 300		atomic_add_negative(-1, &(p)->_count);	\
 301	})
 302
 303/*
 304 * Grab a ref, return true if the page previously had a logical refcount of
 305 * zero.  ie: returns true if we just grabbed an already-deemed-to-be-free page
 306 */
 307#define get_page_testone(p)	atomic_inc_and_test(&(p)->_count)
 308
 309#define set_page_count(p,v) 	atomic_set(&(p)->_count, v - 1)
 310#define __put_page(p)		atomic_dec(&(p)->_count)
 311
 312extern void FASTCALL(__page_cache_release(struct page *));
 313
 314#ifdef CONFIG_HUGETLB_PAGE
 315
 316static inline int page_count(struct page *page)
 317{
 318	if (PageCompound(page))
 319		page = (struct page *)page_private(page);
 320	return atomic_read(&page->_count) + 1;
 321}
 322
 323static inline void get_page(struct page *page)
 324{
 325	if (unlikely(PageCompound(page)))
 326		page = (struct page *)page_private(page);
 327	atomic_inc(&page->_count);
 328}
 329
 330void put_page(struct page *page);
 331
 332#else		/* CONFIG_HUGETLB_PAGE */
 333
 334#define page_count(p)		(atomic_read(&(p)->_count) + 1)
 335
 336static inline void get_page(struct page *page)
 337{
 338	atomic_inc(&page->_count);
 339}
 340
 341static inline void put_page(struct page *page)
 342{
 343	if (put_page_testzero(page))
 344		__page_cache_release(page);
 345}
 346
 347#endif		/* CONFIG_HUGETLB_PAGE */
 348
 349/*
 350 * Multiple processes may "see" the same page. E.g. for untouched
 351 * mappings of /dev/null, all processes see the same page full of
 352 * zeroes, and text pages of executables and shared libraries have
 353 * only one copy in memory, at most, normally.
 354 *
 355 * For the non-reserved pages, page_count(page) denotes a reference count.
 356 *   page_count() == 0 means the page is free. page->lru is then used for
 357 *   freelist management in the buddy allocator.
 358 *   page_count() == 1 means the page is used for exactly one purpose
 359 *   (e.g. a private data page of one process).
 360 *
 361 * A page may be used for kmalloc() or anyone else who does a
 362 * __get_free_page(). In this case the page_count() is at least 1, and
 363 * all other fields are unused but should be 0 or NULL. The
 364 * management of this page is the responsibility of the one who uses
 365 * it.
 366 *
 367 * The other pages (we may call them "process pages") are completely
 368 * managed by the Linux memory manager: I/O, buffers, swapping etc.
 369 * The following discussion applies only to them.
 370 *
 371 * A page may belong to an inode's memory mapping. In this case,
 372 * page->mapping is the pointer to the inode, and page->index is the
 373 * file offset of the page, in units of PAGE_CACHE_SIZE.
 374 *
 375 * A page contains an opaque `private' member, which belongs to the
 376 * page's address_space.  Usually, this is the address of a circular
 377 * list of the page's disk buffers.
 378 *
 379 * For pages belonging to inodes, the page_count() is the number of
 380 * attaches, plus 1 if `private' contains something, plus one for
 381 * the page cache itself.
 382 *
 383 * Instead of keeping dirty/clean pages in per address-space lists, we instead
 384 * now tag pages as dirty/under writeback in the radix tree.
 385 *
 386 * There is also a per-mapping radix tree mapping index to the page
 387 * in memory if present. The tree is rooted at mapping->root.  
 388 *
 389 * All process pages can do I/O:
 390 * - inode pages may need to be read from disk,
 391 * - inode pages which have been modified and are MAP_SHARED may need
 392 *   to be written to disk,
 393 * - private pages which have been modified may need to be swapped out
 394 *   to swap space and (later) to be read back into memory.
 395 */
 396
 397/*
 398 * The zone field is never updated after free_area_init_core()
 399 * sets it, so none of the operations on it need to be atomic.
 400 */
 401
 402
 403/*
 404 * page->flags layout:
 405 *
 406 * There are three possibilities for how page->flags get
 407 * laid out.  The first is for the normal case, without
 408 * sparsemem.  The second is for sparsemem when there is
 409 * plenty of space for node and section.  The last is when
 410 * we have run out of space and have to fall back to an
 411 * alternate (slower) way of determining the node.
 412 *
 413 *        No sparsemem: |       NODE     | ZONE | ... | FLAGS |
 414 * with space for node: | SECTION | NODE | ZONE | ... | FLAGS |
 415 *   no space for node: | SECTION |     ZONE    | ... | FLAGS |
 416 */
 417#ifdef CONFIG_SPARSEMEM
 418#define SECTIONS_WIDTH		SECTIONS_SHIFT
 419#else
 420#define SECTIONS_WIDTH		0
 421#endif
 422
 423#define ZONES_WIDTH		ZONES_SHIFT
 424
 425#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= FLAGS_RESERVED
 426#define NODES_WIDTH		NODES_SHIFT
 427#else
 428#define NODES_WIDTH		0
 429#endif
 430
 431/* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
 432#define SECTIONS_PGOFF		((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
 433#define NODES_PGOFF		(SECTIONS_PGOFF - NODES_WIDTH)
 434#define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
 435
 436/*
 437 * We are going to use the flags for the page to node mapping if its in
 438 * there.  This includes the case where there is no node, so it is implicit.
 439 */
 440#define FLAGS_HAS_NODE		(NODES_WIDTH > 0 || NODES_SHIFT == 0)
 441
 442#ifndef PFN_SECTION_SHIFT
 443#define PFN_SECTION_SHIFT 0
 444#endif
 445
 446/*
 447 * Define the bit shifts to access each section.  For non-existant
 448 * sections we define the shift as 0; that plus a 0 mask ensures
 449 * the compiler will optimise away reference to them.
 450 */
 451#define SECTIONS_PGSHIFT	(SECTIONS_PGOFF * (SECTIONS_WIDTH != 0))
 452#define NODES_PGSHIFT		(NODES_PGOFF * (NODES_WIDTH != 0))
 453#define ZONES_PGSHIFT		(ZONES_PGOFF * (ZONES_WIDTH != 0))
 454
 455/* NODE:ZONE or SECTION:ZONE is used to lookup the zone from a page. */
 456#if FLAGS_HAS_NODE
 457#define ZONETABLE_SHIFT		(NODES_SHIFT + ZONES_SHIFT)
 458#else
 459#define ZONETABLE_SHIFT		(SECTIONS_SHIFT + ZONES_SHIFT)
 460#endif
 461#define ZONETABLE_PGSHIFT	ZONES_PGSHIFT
 462
 463#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
 464#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED
 465#endif
 466
 467#define ZONES_MASK		((1UL << ZONES_WIDTH) - 1)
 468#define NODES_MASK		((1UL << NODES_WIDTH) - 1)
 469#define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1)
 470#define ZONETABLE_MASK		((1UL << ZONETABLE_SHIFT) - 1)
 471
 472static inline unsigned long page_zonenum(struct page *page)
 473{
 474	return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 475}
 476
 477struct zone;
 478extern struct zone *zone_table[];
 479
 480static inline struct zone *page_zone(struct page *page)
 481{
 482	return zone_table[(page->flags >> ZONETABLE_PGSHIFT) &
 483			ZONETABLE_MASK];
 484}
 485
 486static inline unsigned long page_to_nid(struct page *page)
 487{
 488	if (FLAGS_HAS_NODE)
 489		return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
 490	else
 491		return page_zone(page)->zone_pgdat->node_id;
 492}
 493static inline unsigned long page_to_section(struct page *page)
 494{
 495	return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
 496}
 497
 498static inline void set_page_zone(struct page *page, unsigned long zone)
 499{
 500	page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
 501	page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
 502}
 503static inline void set_page_node(struct page *page, unsigned long node)
 504{
 505	page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
 506	page->flags |= (node & NODES_MASK) << NODES_PGSHIFT;
 507}
 508static inline void set_page_section(struct page *page, unsigned long section)
 509{
 510	page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT);
 511	page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
 512}
 513
 514static inline void set_page_links(struct page *page, unsigned long zone,
 515	unsigned long node, unsigned long pfn)
 516{
 517	set_page_zone(page, zone);
 518	set_page_node(page, node);
 519	set_page_section(page, pfn_to_section_nr(pfn));
 520}
 521
 522#ifndef CONFIG_DISCONTIGMEM
 523/* The array of struct pages - for discontigmem use pgdat->lmem_map */
 524extern struct page *mem_map;
 525#endif
 526
 527static inline void *lowmem_page_address(struct page *page)
 528{
 529	return __va(page_to_pfn(page) << PAGE_SHIFT);
 530}
 531
 532#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
 533#define HASHED_PAGE_VIRTUAL
 534#endif
 535
 536#if defined(WANT_PAGE_VIRTUAL)
 537#define page_address(page) ((page)->virtual)
 538#define set_page_address(page, address)			\
 539	do {						\
 540		(page)->virtual = (address);		\
 541	} while(0)
 542#define page_address_init()  do { } while(0)
 543#endif
 544
 545#if defined(HASHED_PAGE_VIRTUAL)
 546void *page_address(struct page *page);
 547void set_page_address(struct page *page, void *virtual);
 548void page_address_init(void);
 549#endif
 550
 551#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
 552#define page_address(page) lowmem_page_address(page)
 553#define set_page_address(page, address)  do { } while(0)
 554#define page_address_init()  do { } while(0)
 555#endif
 556
 557/*
 558 * On an anonymous page mapped into a user virtual memory area,
 559 * page->mapping points to its anon_vma, not to a struct address_space;
 560 * with the PAGE_MAPPING_ANON bit set to distinguish it.
 561 *
 562 * Please note that, confusingly, "page_mapping" refers to the inode
 563 * address_space which maps the page from disk; whereas "page_mapped"
 564 * refers to user virtual address space into which the page is mapped.
 565 */
 566#define PAGE_MAPPING_ANON	1
 567
 568extern struct address_space swapper_space;
 569static inline struct address_space *page_mapping(struct page *page)
 570{
 571	struct address_space *mapping = page->mapping;
 572
 573	if (unlikely(PageSwapCache(page)))
 574		mapping = &swapper_space;
 575	else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON))
 576		mapping = NULL;
 577	return mapping;
 578}
 579
 580static inline int PageAnon(struct page *page)
 581{
 582	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
 583}
 584
 585/*
 586 * Return the pagecache index of the passed page.  Regular pagecache pages
 587 * use ->index whereas swapcache pages use ->private
 588 */
 589static inline pgoff_t page_index(struct page *page)
 590{
 591	if (unlikely(PageSwapCache(page)))
 592		return page_private(page);
 593	return page->index;
 594}
 595
 596/*
 597 * The atomic page->_mapcount, like _count, starts from -1:
 598 * so that transitions both from it and to it can be tracked,
 599 * using atomic_inc_and_test and atomic_add_negative(-1).
 600 */
 601static inline void reset_page_mapcount(struct page *page)
 602{
 603	atomic_set(&(page)->_mapcount, -1);
 604}
 605
 606static inline int page_mapcount(struct page *page)
 607{
 608	return atomic_read(&(page)->_mapcount) + 1;
 609}
 610
 611/*
 612 * Return true if this page is mapped into pagetables.
 613 */
 614static inline int page_mapped(struct page *page)
 615{
 616	return atomic_read(&(page)->_mapcount) >= 0;
 617}
 618
 619/*
 620 * Error return values for the *_nopage functions
 621 */
 622#define NOPAGE_SIGBUS	(NULL)
 623#define NOPAGE_OOM	((struct page *) (-1))
 624
 625/*
 626 * Different kinds of faults, as returned by handle_mm_fault().
 627 * Used to decide whether a process gets delivered SIGBUS or
 628 * just gets major/minor fault counters bumped up.
 629 */
 630#define VM_FAULT_OOM	0x00
 631#define VM_FAULT_SIGBUS	0x01
 632#define VM_FAULT_MINOR	0x02
 633#define VM_FAULT_MAJOR	0x03
 634
 635/* 
 636 * Special case for get_user_pages.
 637 * Must be in a distinct bit from the above VM_FAULT_ flags.
 638 */
 639#define VM_FAULT_WRITE	0x10
 640
 641#define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 642
 643extern void show_free_areas(void);
 644
 645#ifdef CONFIG_SHMEM
 646struct page *shmem_nopage(struct vm_area_struct *vma,
 647			unsigned long address, int *type);
 648int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
 649struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 650					unsigned long addr);
 651int shmem_lock(struct file *file, int lock, struct user_struct *user);
 652#else
 653#define shmem_nopage filemap_nopage
 654#define shmem_lock(a, b, c) 	({0;})	/* always in memory, no need to lock */
 655#define shmem_set_policy(a, b)	(0)
 656#define shmem_get_policy(a, b)	(NULL)
 657#endif
 658struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags);
 659
 660int shmem_zero_setup(struct vm_area_struct *);
 661
 662static inline int can_do_mlock(void)
 663{
 664	if (capable(CAP_IPC_LOCK))
 665		return 1;
 666	if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
 667		return 1;
 668	return 0;
 669}
 670extern int user_shm_lock(size_t, struct user_struct *);
 671extern void user_shm_unlock(size_t, struct user_struct *);
 672
 673/*
 674 * Parameter block passed down to zap_pte_range in exceptional cases.
 675 */
 676struct zap_details {
 677	struct vm_area_struct *nonlinear_vma;	/* Check page->index if set */
 678	struct address_space *check_mapping;	/* Check page->mapping if set */
 679	pgoff_t	first_index;			/* Lowest page->index to unmap */
 680	pgoff_t last_index;			/* Highest page->index to unmap */
 681	spinlock_t *i_mmap_lock;		/* For unmap_mapping_range: */
 682	unsigned long truncate_count;		/* Compare vm_truncate_count */
 683};
 684
 685unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 686		unsigned long size, struct zap_details *);
 687unsigned long unmap_vmas(struct mmu_gather **tlb,
 688		struct vm_area_struct *start_vma, unsigned long start_addr,
 689		unsigned long end_addr, unsigned long *nr_accounted,
 690		struct zap_details *);
 691void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
 692		unsigned long end, unsigned long floor, unsigned long ceiling);
 693void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
 694		unsigned long floor, unsigned long ceiling);
 695int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 696			struct vm_area_struct *vma);
 697int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
 698			unsigned long size, pgprot_t prot);
 699void unmap_mapping_range(struct address_space *mapping,
 700		loff_t const holebegin, loff_t const holelen, int even_cows);
 701
 702static inline void unmap_shared_mapping_range(struct address_space *mapping,
 703		loff_t const holebegin, loff_t const holelen)
 704{
 705	unmap_mapping_range(mapping, holebegin, holelen, 0);
 706}
 707
 708extern int vmtruncate(struct inode * inode, loff_t offset);
 709extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
 710extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
 711extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
 712
 713static inline int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
 714{
 715	return __handle_mm_fault(mm, vma, address, write_access) & (~VM_FAULT_WRITE);
 716}
 717
 718extern int make_pages_present(unsigned long addr, unsigned long end);
 719extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
 720void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
 721
 722int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
 723		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
 724void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
 725
 726int __set_page_dirty_buffers(struct page *page);
 727int __set_page_dirty_nobuffers(struct page *page);
 728int redirty_page_for_writepage(struct writeback_control *wbc,
 729				struct page *page);
 730int FASTCALL(set_page_dirty(struct page *page));
 731int set_page_dirty_lock(struct page *page);
 732int clear_page_dirty_for_io(struct page *page);
 733
 734extern unsigned long do_mremap(unsigned long addr,
 735			       unsigned long old_len, unsigned long new_len,
 736			       unsigned long flags, unsigned long new_addr);
 737
 738/*
 739 * Prototype to add a shrinker callback for ageable caches.
 740 * 
 741 * These functions are passed a count `nr_to_scan' and a gfpmask.  They should
 742 * scan `nr_to_scan' objects, attempting to free them.
 743 *
 744 * The callback must return the number of objects which remain in the cache.
 745 *
 746 * The callback will be passed nr_to_scan == 0 when the VM is querying the
 747 * cache size, so a fastpath for that case is appropriate.
 748 */
 749typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
 750
 751/*
 752 * Add an aging callback.  The int is the number of 'seeks' it takes
 753 * to recreate one of the objects that these functions age.
 754 */
 755
 756#define DEFAULT_SEEKS 2
 757struct shrinker;
 758extern struct shrinker *set_shrinker(int, shrinker_t);
 759extern void remove_shrinker(struct shrinker *shrinker);
 760
 761int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
 762int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 763int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
 764int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
 765
 766/*
 767 * The following ifdef needed to get the 4level-fixup.h header to work.
 768 * Remove it when 4level-fixup.h has been removed.
 769 */
 770#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
 771static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
 772{
 773	return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
 774		NULL: pud_offset(pgd, address);
 775}
 776
 777static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 778{
 779	return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
 780		NULL: pmd_offset(pud, address);
 781}
 782#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
 783
 784#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
 785/*
 786 * We tuck a spinlock to guard each pagetable page into its struct page,
 787 * at page->private, with BUILD_BUG_ON to make sure that this will not
 788 * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
 789 * When freeing, reset page->mapping so free_pages_check won't complain.
 790 */
 791#define __pte_lockptr(page)	&((page)->u.ptl)
 792#define pte_lock_init(_page)	do {					\
 793	spin_lock_init(__pte_lockptr(_page));				\
 794} while (0)
 795#define pte_lock_deinit(page)	((page)->mapping = NULL)
 796#define pte_lockptr(mm, pmd)	({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
 797#else
 798/*
 799 * We use mm->page_table_lock to guard all pagetable pages of the mm.
 800 */
 801#define pte_lock_init(page)	do {} while (0)
 802#define pte_lock_deinit(page)	do {} while (0)
 803#define pte_lockptr(mm, pmd)	({(void)(pmd); &(mm)->page_table_lock;})
 804#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
 805
 806#define pte_offset_map_lock(mm, pmd, address, ptlp)	\
 807({							\
 808	spinlock_t *__ptl = pte_lockptr(mm, pmd);	\
 809	pte_t *__pte = pte_offset_map(pmd, address);	\
 810	*(ptlp) = __ptl;				\
 811	spin_lock(__ptl);				\
 812	__pte;						\
 813})
 814
 815#define pte_unmap_unlock(pte, ptl)	do {		\
 816	spin_unlock(ptl);				\
 817	pte_unmap(pte);					\
 818} while (0)
 819
 820#define pte_alloc_map(mm, pmd, address)			\
 821	((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
 822		NULL: pte_offset_map(pmd, address))
 823
 824#define pte_alloc_map_lock(mm, pmd, address, ptlp)	\
 825	((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
 826		NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
 827
 828#define pte_alloc_kernel(pmd, address)			\
 829	((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
 830		NULL: pte_offset_kernel(pmd, address))
 831
 832extern void free_area_init(unsigned long * zones_size);
 833extern void free_area_init_node(int nid, pg_data_t *pgdat,
 834	unsigned long * zones_size, unsigned long zone_start_pfn, 
 835	unsigned long *zholes_size);
 836extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
 837extern void setup_per_zone_pages_min(void);
 838extern void mem_init(void);
 839extern void show_mem(void);
 840extern void si_meminfo(struct sysinfo * val);
 841extern void si_meminfo_node(struct sysinfo *val, int nid);
 842
 843#ifdef CONFIG_NUMA
 844extern void setup_per_cpu_pageset(void);
 845#else
 846static inline void setup_per_cpu_pageset(void) {}
 847#endif
 848
 849/* prio_tree.c */
 850void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
 851void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
 852void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *);
 853struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
 854	struct prio_tree_iter *iter);
 855
 856#define vma_prio_tree_foreach(vma, iter, root, begin, end)	\
 857	for (prio_tree_iter_init(iter, root, begin, end), vma = NULL;	\
 858		(vma = vma_prio_tree_next(vma, iter)); )
 859
 860static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
 861					struct list_head *list)
 862{
 863	vma->shared.vm_set.parent = NULL;
 864	list_add_tail(&vma->shared.vm_set.list, list);
 865}
 866
 867/* mmap.c */
 868extern int __vm_enough_memory(long pages, int cap_sys_admin);
 869extern void vma_adjust(struct vm_area_struct *vma, unsigned long start,
 870	unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);
 871extern struct vm_area_struct *vma_merge(struct mm_struct *,
 872	struct vm_area_struct *prev, unsigned long addr, unsigned long end,
 873	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
 874	struct mempolicy *);
 875extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
 876extern int split_vma(struct mm_struct *,
 877	struct vm_area_struct *, unsigned long addr, int new_below);
 878extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 879extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
 880	struct rb_node **, struct rb_node *);
 881extern void unlink_file_vma(struct vm_area_struct *);
 882extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 883	unsigned long addr, unsigned long len, pgoff_t pgoff);
 884extern void exit_mmap(struct mm_struct *);
 885extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
 886
 887extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 888
 889extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 890	unsigned long len, unsigned long prot,
 891	unsigned long flag, unsigned long pgoff);
 892
 893static inline unsigned long do_mmap(struct file *file, unsigned long addr,
 894	unsigned long len, unsigned long prot,
 895	unsigned long flag, unsigned long offset)
 896{
 897	unsigned long ret = -EINVAL;
 898	if ((offset + PAGE_ALIGN(len)) < offset)
 899		goto out;
 900	if (!(offset & ~PAGE_MASK))
 901		ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
 902out:
 903	return ret;
 904}
 905
 906extern int do_munmap(struct mm_struct *, unsigned long, size_t);
 907
 908extern unsigned long do_brk(unsigned long, unsigned long);
 909
 910/* filemap.c */
 911extern unsigned long page_unuse(struct page *);
 912extern void truncate_inode_pages(struct address_space *, loff_t);
 913
 914/* generic vm_area_ops exported for stackable file systems */
 915extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *);
 916extern int filemap_populate(struct vm_area_struct *, unsigned long,
 917		unsigned long, pgprot_t, unsigned long, int);
 918
 919/* mm/page-writeback.c */
 920int write_one_page(struct page *page, int wait);
 921
 922/* readahead.c */
 923#define VM_MAX_READAHEAD	128	/* kbytes */
 924#define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 925#define VM_MAX_CACHE_HIT    	256	/* max pages in a row in cache before
 926					 * turning readahead off */
 927
 928int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 929			pgoff_t offset, unsigned long nr_to_read);
 930int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 931			pgoff_t offset, unsigned long nr_to_read);
 932unsigned long page_cache_readahead(struct address_space *mapping,
 933			  struct file_ra_state *ra,
 934			  struct file *filp,
 935			  pgoff_t offset,
 936			  unsigned long size);
 937void handle_ra_miss(struct address_space *mapping, 
 938		    struct file_ra_state *ra, pgoff_t offset);
 939unsigned long max_sane_readahead(unsigned long nr);
 940
 941/* Do stack extension */
 942extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
 943#ifdef CONFIG_IA64
 944extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
 945#endif
 946
 947/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 948extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
 949extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
 950					     struct vm_area_struct **pprev);
 951
 952/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
 953   NULL if none.  Assume start_addr < end_addr. */
 954static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
 955{
 956	struct vm_area_struct * vma = find_vma(mm,start_addr);
 957
 958	if (vma && end_addr <= vma->vm_start)
 959		vma = NULL;
 960	return vma;
 961}
 962
 963static inline unsigned long vma_pages(struct vm_area_struct *vma)
 964{
 965	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 966}
 967
 968struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 969struct page *vmalloc_to_page(void *addr);
 970unsigned long vmalloc_to_pfn(void *addr);
 971int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 972			unsigned long pfn, unsigned long size, pgprot_t);
 973
 974struct page *follow_page(struct mm_struct *, unsigned long address,
 975			unsigned int foll_flags);
 976#define FOLL_WRITE	0x01	/* check pte is writable */
 977#define FOLL_TOUCH	0x02	/* mark page accessed */
 978#define FOLL_GET	0x04	/* do get_page on page */
 979#define FOLL_ANON	0x08	/* give ZERO_PAGE if no pgtable */
 980
 981#ifdef CONFIG_PROC_FS
 982void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
 983#else
 984static inline void vm_stat_account(struct mm_struct *mm,
 985			unsigned long flags, struct file *file, long pages)
 986{
 987}
 988#endif /* CONFIG_PROC_FS */
 989
 990#ifndef CONFIG_DEBUG_PAGEALLOC
 991static inline void
 992kernel_map_pages(struct page *page, int numpages, int enable)
 993{
 994}
 995#endif
 996
 997extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
 998#ifdef	__HAVE_ARCH_GATE_AREA
 999int in_gate_area_no_task(unsigned long addr);
1000int in_gate_area(struct task_struct *task, unsigned long addr);
1001#else
1002int in_gate_area_no_task(unsigned long addr);
1003#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
1004#endif	/* __HAVE_ARCH_GATE_AREA */
1005
1006/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
1007#define OOM_DISABLE -17
1008
1009#endif /* __KERNEL__ */
1010#endif /* _LINUX_MM_H */
Configure Feed

Configure Feed