+49
-45
arch/parisc/kernel/perf.c
+49
-45
arch/parisc/kernel/perf.c
···
39
39
* the PDC INTRIGUE calls. This is done to eliminate bugs introduced
40
40
* in various PDC revisions. The code is much more maintainable
41
41
* and reliable this way vs having to debug on every version of PDC
42
-
* on every box.
42
+
* on every box.
43
43
*/
44
44
45
45
#include <linux/capability.h>
···
195
195
static int perf_release(struct inode *inode, struct file *file);
196
196
static int perf_open(struct inode *inode, struct file *file);
197
197
static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
198
-
static ssize_t perf_write(struct file *file, const char __user *buf, size_t count,
199
-
loff_t *ppos);
198
+
static ssize_t perf_write(struct file *file, const char __user *buf,
199
+
size_t count, loff_t *ppos);
200
200
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
201
201
static void perf_start_counters(void);
202
202
static int perf_stop_counters(uint32_t *raddr);
···
222
222
/*
223
223
* configure:
224
224
*
225
-
* Configure the cpu with a given data image. First turn off the counters,
225
+
* Configure the cpu with a given data image. First turn off the counters,
226
226
* then download the image, then turn the counters back on.
227
227
*/
228
228
static int perf_config(uint32_t *image_ptr)
···
234
234
error = perf_stop_counters(raddr);
235
235
if (error != 0) {
236
236
printk("perf_config: perf_stop_counters = %ld\n", error);
237
-
return -EINVAL;
237
+
return -EINVAL;
238
238
}
239
239
240
240
printk("Preparing to write image\n");
···
242
242
error = perf_write_image((uint64_t *)image_ptr);
243
243
if (error != 0) {
244
244
printk("perf_config: DOWNLOAD = %ld\n", error);
245
-
return -EINVAL;
245
+
return -EINVAL;
246
246
}
247
247
248
248
printk("Preparing to start counters\n");
···
254
254
}
255
255
256
256
/*
257
-
* Open the device and initialize all of its memory. The device is only
257
+
* Open the device and initialize all of its memory. The device is only
258
258
* opened once, but can be "queried" by multiple processes that know its
259
259
* file descriptor.
260
260
*/
···
298
298
* called on the processor that the download should happen
299
299
* on.
300
300
*/
301
-
static ssize_t perf_write(struct file *file, const char __user *buf, size_t count,
302
-
loff_t *ppos)
301
+
static ssize_t perf_write(struct file *file, const char __user *buf,
302
+
size_t count, loff_t *ppos)
303
303
{
304
304
size_t image_size;
305
305
uint32_t image_type;
306
306
uint32_t interface_type;
307
307
uint32_t test;
308
308
309
-
if (perf_processor_interface == ONYX_INTF)
309
+
if (perf_processor_interface == ONYX_INTF)
310
310
image_size = PCXU_IMAGE_SIZE;
311
-
else if (perf_processor_interface == CUDA_INTF)
311
+
else if (perf_processor_interface == CUDA_INTF)
312
312
image_size = PCXW_IMAGE_SIZE;
313
-
else
313
+
else
314
314
return -EFAULT;
315
315
316
316
if (!capable(CAP_SYS_ADMIN))
···
330
330
331
331
/* First check the machine type is correct for
332
332
the requested image */
333
-
if (((perf_processor_interface == CUDA_INTF) &&
334
-
(interface_type != CUDA_INTF)) ||
335
-
((perf_processor_interface == ONYX_INTF) &&
336
-
(interface_type != ONYX_INTF)))
333
+
if (((perf_processor_interface == CUDA_INTF) &&
334
+
(interface_type != CUDA_INTF)) ||
335
+
((perf_processor_interface == ONYX_INTF) &&
336
+
(interface_type != ONYX_INTF)))
337
337
return -EINVAL;
338
338
339
339
/* Next check to make sure the requested image
340
340
is valid */
341
-
if (((interface_type == CUDA_INTF) &&
341
+
if (((interface_type == CUDA_INTF) &&
342
342
(test >= MAX_CUDA_IMAGES)) ||
343
-
((interface_type == ONYX_INTF) &&
344
-
(test >= MAX_ONYX_IMAGES)))
343
+
((interface_type == ONYX_INTF) &&
344
+
(test >= MAX_ONYX_IMAGES)))
345
345
return -EINVAL;
346
346
347
347
/* Copy the image into the processor */
348
-
if (interface_type == CUDA_INTF)
348
+
if (interface_type == CUDA_INTF)
349
349
return perf_config(cuda_images[test]);
350
350
else
351
351
return perf_config(onyx_images[test]);
···
359
359
static void perf_patch_images(void)
360
360
{
361
361
#if 0 /* FIXME!! */
362
-
/*
362
+
/*
363
363
* NOTE: this routine is VERY specific to the current TLB image.
364
364
* If the image is changed, this routine might also need to be changed.
365
365
*/
···
367
367
extern void $i_dtlb_miss_2_0();
368
368
extern void PA2_0_iva();
369
369
370
-
/*
370
+
/*
371
371
* We can only use the lower 32-bits, the upper 32-bits should be 0
372
-
* anyway given this is in the kernel
372
+
* anyway given this is in the kernel
373
373
*/
374
374
uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0);
375
375
uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0);
···
377
377
378
378
if (perf_processor_interface == ONYX_INTF) {
379
379
/* clear last 2 bytes */
380
-
onyx_images[TLBMISS][15] &= 0xffffff00;
380
+
onyx_images[TLBMISS][15] &= 0xffffff00;
381
381
/* set 2 bytes */
382
382
onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
383
383
onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
384
384
onyx_images[TLBMISS][17] = itlb_addr;
385
385
386
386
/* clear last 2 bytes */
387
-
onyx_images[TLBHANDMISS][15] &= 0xffffff00;
387
+
onyx_images[TLBHANDMISS][15] &= 0xffffff00;
388
388
/* set 2 bytes */
389
389
onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
390
390
onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
391
391
onyx_images[TLBHANDMISS][17] = itlb_addr;
392
392
393
393
/* clear last 2 bytes */
394
-
onyx_images[BIG_CPI][15] &= 0xffffff00;
394
+
onyx_images[BIG_CPI][15] &= 0xffffff00;
395
395
/* set 2 bytes */
396
396
onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
397
397
onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
···
404
404
405
405
} else if (perf_processor_interface == CUDA_INTF) {
406
406
/* Cuda interface */
407
-
cuda_images[TLBMISS][16] =
407
+
cuda_images[TLBMISS][16] =
408
408
(cuda_images[TLBMISS][16]&0xffff0000) |
409
409
((dtlb_addr >> 8)&0x0000ffff);
410
-
cuda_images[TLBMISS][17] =
410
+
cuda_images[TLBMISS][17] =
411
411
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
412
412
cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
413
413
414
-
cuda_images[TLBHANDMISS][16] =
414
+
cuda_images[TLBHANDMISS][16] =
415
415
(cuda_images[TLBHANDMISS][16]&0xffff0000) |
416
416
((dtlb_addr >> 8)&0x0000ffff);
417
-
cuda_images[TLBHANDMISS][17] =
417
+
cuda_images[TLBHANDMISS][17] =
418
418
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
419
419
cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
420
420
421
-
cuda_images[BIG_CPI][16] =
421
+
cuda_images[BIG_CPI][16] =
422
422
(cuda_images[BIG_CPI][16]&0xffff0000) |
423
423
((dtlb_addr >> 8)&0x0000ffff);
424
-
cuda_images[BIG_CPI][17] =
424
+
cuda_images[BIG_CPI][17] =
425
425
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
426
426
cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
427
427
} else {
···
433
433
434
434
/*
435
435
* ioctl routine
436
-
* All routines effect the processor that they are executed on. Thus you
436
+
* All routines effect the processor that they are executed on. Thus you
437
437
* must be running on the processor that you wish to change.
438
438
*/
439
439
···
459
459
}
460
460
461
461
/* copy out the Counters */
462
-
if (copy_to_user((void __user *)arg, raddr,
462
+
if (copy_to_user((void __user *)arg, raddr,
463
463
sizeof (raddr)) != 0) {
464
464
error = -EFAULT;
465
465
break;
···
487
487
.open = perf_open,
488
488
.release = perf_release
489
489
};
490
-
490
+
491
491
static struct miscdevice perf_dev = {
492
492
MISC_DYNAMIC_MINOR,
493
493
PA_PERF_DEV,
···
595
595
/* OR sticky2 (bit 1496) to counter2 bit 32 */
596
596
tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
597
597
raddr[2] = (uint32_t)tmp64;
598
-
598
+
599
599
/* Counter3 is bits 1497 to 1528 */
600
600
tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff;
601
601
/* OR sticky3 (bit 1529) to counter3 bit 32 */
···
617
617
userbuf[22] = 0;
618
618
userbuf[23] = 0;
619
619
620
-
/*
620
+
/*
621
621
* Write back the zeroed bytes + the image given
622
622
* the read was destructive.
623
623
*/
···
625
625
} else {
626
626
627
627
/*
628
-
* Read RDR-15 which contains the counters and sticky bits
628
+
* Read RDR-15 which contains the counters and sticky bits
629
629
*/
630
630
if (!perf_rdr_read_ubuf(15, userbuf)) {
631
631
return -13;
632
632
}
633
633
634
-
/*
634
+
/*
635
635
* Clear out the counters
636
636
*/
637
637
perf_rdr_clear(15);
···
644
644
raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
645
645
raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
646
646
}
647
-
647
+
648
648
return 0;
649
649
}
650
650
···
682
682
i = tentry->num_words;
683
683
while (i--) {
684
684
buffer[i] = 0;
685
-
}
685
+
}
686
686
687
687
/* Check for bits an even number of 64 */
688
688
if ((xbits = width & 0x03f) != 0) {
···
808
808
}
809
809
810
810
runway = ioremap_nocache(cpu_device->hpa.start, 4096);
811
+
if (!runway) {
812
+
pr_err("perf_write_image: ioremap failed!\n");
813
+
return -ENOMEM;
814
+
}
811
815
812
816
/* Merge intrigue bits into Runway STATUS 0 */
813
817
tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
814
-
__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
818
+
__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
815
819
runway + RUNWAY_STATUS);
816
-
820
+
817
821
/* Write RUNWAY DEBUG registers */
818
822
for (i = 0; i < 8; i++) {
819
823
__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
820
824
}
821
825
822
-
return 0;
826
+
return 0;
823
827
}
824
828
825
829
/*
···
847
843
perf_rdr_shift_out_U(rdr_num, buffer[i]);
848
844
} else {
849
845
perf_rdr_shift_out_W(rdr_num, buffer[i]);
850
-
}
846
+
}
851
847
}
852
848
printk("perf_rdr_write done\n");
853
849
}