Dot Product
| dot_ppu.c
|
|
#include <stdio.h>
#include <libspe2.h>
#include <pthread.h>
typedef struct ppu_pthread_data {
spe_context_ptr_t context;
unsigned int entry;
void *argp;
void *envp;
} ppu_pthread_data_t;
void *ppu_pthread_function(void *arg) {
ppu_pthread_data_t *data = (ppu_pthread_data_t *) arg;
spe_context_run(data->context,&data->entry,0,data->argp,data->envp,NULL);
pthread_exit(NULL);
}
typedef struct {
float *X;
float *Y;
float *Z;
char dummy[116];
} MYSTRUCT;
extern spe_program_handle_t dot_spu;
int main(void) {
float X[256] __attribute__((aligned(128)));
float Y[256] __attribute__((aligned(128)));
float Z[256] __attribute__((aligned(128)));
unsigned int i;
for (i=0;i<256;i++) { X[i] = i * 2; Y[i] = i * 2 + 1; }
MYSTRUCT mystruct __attribute__((aligned(128)));
mystruct.X = X;
mystruct.Y = Y;
mystruct.Z = Z;
ppu_pthread_data_t ptdata[8];
pthread_t pthread[8];
for (i=0;i<8;i++) {
ptdata[i].context = spe_context_create(0,NULL);
spe_program_load(ptdata[i].context,&dma_spu);
ptdata[i].entry = SPE_DEFAULT_ENTRY;
ptdata[i].argp = (void *) &mystruct;
ptdata[i].envp = (void *) 128;
pthread_create(&pthread[i],NULL,&ppu_pthread_function,&ptdata[i]);
spe_in_mbox_write(ptdata[i].context,&i,1,SPE_MBOX_ANY_NONBLOCKING);
}
for (i=0;i<8;i++) {
pthread_join(pthread[i],NULL);
spe_context_destroy(ptdata[i].context);
}
// compute dot product
float sum = 0.;
for (i=0;i<256;i++) sum += Z[i];
printf("Dot product ! %10.1f\n",sum);
return 0;
}
|
| dot_spu.c
|
|
#include<stdio.h>
#include<spu_mfcio.h>
typedef struct {
float *X;
float *Y;
float *Z;
char dummy[116];
} MYSTRUCT;
int main(unsigned long long speid,unsigned long long argp,unsigned long long envp) {
MYSTRUCT mystruct __attribute__((aligned(128)));
float X[32] __attribute__((aligned(128)));
float Y[32] __attribute__((aligned(128)));
float Z[32] __attribute__((aligned(128)));
unsigned int spu_id = spu_read_in_mbox();
int tag = 1, tag_mask = 1<<tag;
mfc_get(&mystruct,(unsigned int) argp,envp,tag,0,0);
mfc_write_tag_mask(tag_mask);
mfc_read_tag_status_all();
unsigned int offset = spu_id * 128;
mfc_get(X,(unsigned long int) mystruct.X+offset,128,tag,0,0);
mfc_get(Y,(unsigned long int) mystruct.Y+offset,128,tag,0,0);
mfc_read_tag_status_all();
// compute vector Z = X * Y
int i;
for (i=0;i<32;i++) Z[i] = X[i] * Y[i];
// transfer data to PPU using DMA
mfc_put(Z,(unsigned long int) mystruct.Z+offset,128,tag,0,0);
mfc_read_tag_status_all();
printf("End of SPU %i thread\n",spu_id);
return 0;
}
|
|
| Dot Product
|
The dot product of two vectors :
- a = [a1, a2, … , an]
- b = [b1, b2, … , bn]
is by definition :


|
| read more on Wikipedia ...
|
| DMA
|
(void) mfc_put (volatile void *lsa, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid)
- lsa : local store address
- ea : effective address in main memory
- size : DMA transfer size
- tag : DMA tag identifier
- tid : transfer class identifier
- rid : replacement class identifier
|
|