Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
futexes: fix fault handling in futex_lock_pi

+73 -20
+73 -20
kernel/futex.c
··· 1096 1096 * private futexes. 1097 1097 */ 1098 1098 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1099 - struct task_struct *newowner) 1099 + struct task_struct *newowner, 1100 + struct rw_semaphore *fshared) 1100 1101 { 1101 1102 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; 1102 1103 struct futex_pi_state *pi_state = q->pi_state; 1104 + struct task_struct *oldowner = pi_state->owner; 1103 1105 u32 uval, curval, newval; 1104 - int ret; 1106 + int ret, attempt = 0; 1105 1107 1106 1108 /* Owner died? */ 1109 + if (!pi_state->owner) 1110 + newtid |= FUTEX_OWNER_DIED; 1111 + 1112 + /* 1113 + * We are here either because we stole the rtmutex from the 1114 + * pending owner or we are the pending owner which failed to 1115 + * get the rtmutex. We have to replace the pending owner TID 1116 + * in the user space variable. This must be atomic as we have 1117 + * to preserve the owner died bit here. 1118 + * 1119 + * Note: We write the user space value _before_ changing the 1120 + * pi_state because we can fault here. Imagine swapped out 1121 + * pages or a fork, which was running right before we acquired 1122 + * mmap_sem, that marked all the anonymous memory readonly for 1123 + * cow. 1124 + * 1125 + * Modifying pi_state _before_ the user space value would 1126 + * leave the pi_state in an inconsistent state when we fault 1127 + * here, because we need to drop the hash bucket lock to 1128 + * handle the fault. This might be observed in the PID check 1129 + * in lookup_pi_state. 1130 + */ 1131 + retry: 1132 + if (get_futex_value_locked(&uval, uaddr)) 1133 + goto handle_fault; 1134 + 1135 + while (1) { 1136 + newval = (uval & FUTEX_OWNER_DIED) | newtid; 1137 + 1138 + curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 1139 + 1140 + if (curval == -EFAULT) 1141 + goto handle_fault; 1142 + if (curval == uval) 1143 + break; 1144 + uval = curval; 1145 + } 1146 + 1147 + /* 1148 + * We fixed up user space. Now we need to fix the pi_state 1149 + * itself. 1150 + */ 1107 1151 if (pi_state->owner != NULL) { 1108 1152 spin_lock_irq(&pi_state->owner->pi_lock); 1109 1153 WARN_ON(list_empty(&pi_state->list)); 1110 1154 list_del_init(&pi_state->list); 1111 1155 spin_unlock_irq(&pi_state->owner->pi_lock); 1112 - } else 1113 - newtid |= FUTEX_OWNER_DIED; 1156 + } 1114 1157 1115 1158 pi_state->owner = newowner; 1116 1159 ··· 1161 1118 WARN_ON(!list_empty(&pi_state->list)); 1162 1119 list_add(&pi_state->list, &newowner->pi_state_list); 1163 1120 spin_unlock_irq(&newowner->pi_lock); 1121 + return 0; 1164 1122 1165 1123 /* 1166 - * We own it, so we have to replace the pending owner 1167 - * TID. This must be atomic as we have preserve the 1168 - * owner died bit here. 1124 + * To handle the page fault we need to drop the hash bucket 1125 + * lock here. That gives the other task (either the pending 1126 + * owner itself or the task which stole the rtmutex) the 1127 + * chance to try the fixup of the pi_state. So once we are 1128 + * back from handling the fault we need to check the pi_state 1129 + * after reacquiring the hash bucket lock and before trying to 1130 + * do another fixup. When the fixup has been done already we 1131 + * simply return. 1169 1132 */ 1170 - ret = get_futex_value_locked(&uval, uaddr); 1133 + handle_fault: 1134 + spin_unlock(q->lock_ptr); 1171 1135 1172 - while (!ret) { 1173 - newval = (uval & FUTEX_OWNER_DIED) | newtid; 1136 + ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++); 1174 1137 1175 - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 1138 + spin_lock(q->lock_ptr); 1176 1139 1177 - if (curval == -EFAULT) 1178 - ret = -EFAULT; 1179 - if (curval == uval) 1180 - break; 1181 - uval = curval; 1182 - } 1183 - return ret; 1140 + /* 1141 + * Check if someone else fixed it for us: 1142 + */ 1143 + if (pi_state->owner != oldowner) 1144 + return 0; 1145 + 1146 + if (ret) 1147 + return ret; 1148 + 1149 + goto retry; 1184 1150 } 1185 1151 1186 1152 /* ··· 1559 1507 * that case: 1560 1508 */ 1561 1509 if (q.pi_state->owner != curr) 1562 - ret = fixup_pi_state_owner(uaddr, &q, curr); 1510 + ret = fixup_pi_state_owner(uaddr, &q, curr, fshared); 1563 1511 } else { 1564 1512 /* 1565 1513 * Catch the rare case, where the lock was released ··· 1591 1539 int res; 1592 1540 1593 1541 owner = rt_mutex_owner(&q.pi_state->pi_mutex); 1594 - res = fixup_pi_state_owner(uaddr, &q, owner); 1542 + res = fixup_pi_state_owner(uaddr, &q, owner, 1543 + fshared); 1595 1544 1596 1545 /* propagate -EFAULT, if the fixup failed */ 1597 1546 if (res)