Skip to content

Commit

Permalink
new approach to client side req proc seems to go 1 stripe at a time s…
Browse files Browse the repository at this point in the history
…o gather all stripes
  • Loading branch information
Dennis Dalessandro committed Jan 29, 2008
1 parent 524893b commit 05174d6
Showing 1 changed file with 139 additions and 98 deletions.
237 changes: 139 additions & 98 deletions src/client/sysint/sys-osd-io.sm
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ static int osd_io_init(struct PINT_smcb *smcb, job_status_s *js_p)
PVFS_object_attr *attr = &sm_p->getattr.attr;
int i, ret;


ret = PINT_msgpairarray_init(&sm_p->msgarray, io->datafile_count);
if (ret)
goto out;
Expand Down Expand Up @@ -179,65 +178,49 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p)
struct osd_command *command;
struct bsg_iovec *iov;

/* temp space for output from request processing */
/* clients offset and length pairs */
PVFS_offset offseta[OSD_INIT_MAX_IOVEC];
PVFS_size sizea[OSD_INIT_MAX_IOVEC];
int csegs_count;
PVFS_size cagg_len;

/* temporary space for querying offset length pairs */
PVFS_offset temp_offset[OSD_INIT_MAX_IOVEC];
PVFS_size temp_size[OSD_INIT_MAX_IOVEC];

/* output from running server's view of request processing */
/* targets offset and length pairs */
PVFS_offset target_offset[OSD_INIT_MAX_IOVEC];
PVFS_size target_size[OSD_INIT_MAX_IOVEC];

/* total bytes to write across all io servers */
PVFS_size sbytemax = io->mem_req->aggregate_size;
/* kernel SCSI layer can only handle so many pages at once */
if (sbytemax > KERNEL_BUFSIZE)
sbytemax = KERNEL_BUFSIZE;

PINT_Request_result sresult = {
.offset_array = target_offset,
.size_array = target_size,
.segmax = ARRAY_SIZE(target_offset),
};
PINT_Request_result cresult = {
.offset_array = offseta,
.size_array = sizea,
.segmax = ARRAY_SIZE(offseta),
};
PVFS_size this_aggregate_size = 0;

/* structures to pass into req proc engine */
PINT_Request_result sresult, cresult;

/*
* Cannot send the distribution to the server on OSDs, unlike with
* normal PVFS servers. So we have to generate file-contiguous
* chunks for each server. We do this in two steps, suggested by Phil:
* 1. run server-side processing with segmax = 1, to generate the
* number of bytes in the contiguous segment.
* 2. run client-side processing with bytemax = what we got from step
* 1. Set segmax to the client-side max.
* Iterate.
* One flaw with this approach is that looking for a single segment
* on the server will not cross distribution chunk boundaries, i.e.
* if you have a contiguous memory region that gets simply striped at
* 64k onto two servers, each write will be exactly 64k, although you
* could have done a gather iovec on the client to write all of each
* servers' chunks in one go, as they end up being contig in file on
* each respective server.
*
* Instead of following the above new plan is to
* 1. run server-side processing with segmax = many to generate as much
* as possible for that server
* 2. run client-side processing with same bytemax as 1 to get multiple
* segments.
* 3. build an OSD CDB with SGL headers and set options byte appropriately
* 4. since we have to process each segment we can watch to see if they are
* the same size/stride for our optimized case
*/

for (i=0; i<sm_p->u.io.datafile_count; i++) {

/* kernel SCSI layer can only handle so many pages at once */
if (sbytemax > KERNEL_BUFSIZE) {
gossip_err("%s: Kernel SCSI buffer too small\n", __func__);
ret = -ENOMEM;
goto out;
}

/* for each datafile/io server/osd write ALL of its data */
for (i = 0; i < sm_p->u.io.datafile_count; i++) {
uint8_t *p;
uint32_t len;
PVFS_offset cur_offset;
PVFS_handle datafile_handle = attr->u.meta.dfile_array[
io->datafile_index_array[i]];
PVFS_handle datafile_handle;
int j;

datafile_handle = attr->u.meta.dfile_array[io->datafile_index_array[i]];

if (PINT_REQUEST_DONE(&io->file_req_state[i]) || io->short_read[i]) {
gossip_debug(GOSSIP_IO_DEBUG, "%s: Nothing to do for server %d\n",
__func__, i);
sm_p->msgarray[i].suppress = 1; /* disable this entry */
continue;
}
Expand All @@ -248,47 +231,99 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p)
cur_offset = io->file_req_state[i].target_offset;

gossip_debug(GOSSIP_IO_DEBUG,
"%s: %d: frs type %lld target %lld final %lld cur %lld\n",
"%s: %d: frs type %lld target %lld final %lld cur %lld bytemax %lld\n",
__func__, i,
lld(io->file_req_state[i].type_offset),
lld(io->file_req_state[i].target_offset),
lld(io->file_req_state[i].final_offset), lld(cur_offset));
lld(io->file_req_state[i].final_offset),
lld(cur_offset),
lld(sbytemax));

/*
* Used to: Run the server side to find a single segment. All we care
* about is the resulting size. --- Now: get all segments
*/
sresult.segs = 0;
PINT_REQUEST_STATE_RST(io->temp_req_state);

/* */
/* figure out servers offset/lengths should get all in one go */
/* */
sresult.segs = 0;
sresult.bytes = 0;
sresult.bytemax = sbytemax;
PINT_REQUEST_STATE_RST(io->temp_req_state);
io->temp_req_state->target_offset = cur_offset;
sresult.bytemax = sbytemax; /* request full size */
sresult.offset_array = target_offset;
sresult.size_array = target_size;
sresult.segmax = OSD_INIT_MAX_IOVEC;

/* set up the io req state to pass in */
io->temp_req_state->target_offset = cur_offset; /*may or may not be 0 */
io->temp_req_state->final_offset = io->file_req_state[i].final_offset;
ret = PINT_process_request(io->temp_req_state, NULL, &io->file_data[i],

ret = PINT_process_request(io->temp_req_state, NULL, &io->file_data[i],
&sresult, PINT_SERVER);
if (ret) {
gossip_err("%s: server %d process failed\n", __func__, i);
gossip_err("%s: server %d process_request call failed\n",
__func__, i);
goto out;
}
this_aggregate_size += sresult.bytes;

#if 0
printf("SERVER INFO [%d]:\n", i);
printf("Total size %lld \n", lld(sresult.bytes));
for (j = 0; j < sresult.segs; j++) {
printf("offset= %lld length= %lld\n", lld(target_offset[j]),
lld(target_size[j]));
}
#endif

/* */
/* figure out clinets offset/lengths one stripe at a time */
/* */
csegs_count = 0;
cagg_len = 0;

while (cagg_len != sresult.bytes) {
cresult.segs = 0;
cresult.bytes = 0;
cresult.bytemax = sresult.bytes;
cresult.offset_array = temp_offset;
cresult.size_array = temp_size;
cresult.segmax = OSD_INIT_MAX_IOVEC - csegs_count;

ret = PINT_process_request(&io->file_req_state[i],
&io->mem_req_state[i], &io->file_data[i],
&cresult, PINT_CLIENT);
if (ret) {
gossip_err("%s: client %d process_request call failed\n",
__func__, i);
goto out;
}

/*
* Run the client side to fill the segments.
*/
cresult.segs = 0;
cresult.bytes = 0;
cresult.bytemax = sresult.bytes;
ret = PINT_process_request(&io->file_req_state[i],
&io->mem_req_state[i], &io->file_data[i],
&cresult, PINT_CLIENT);
if (ret) {
gossip_err("%s: client %d process failed\n", __func__, i);
goto out;
}
/* now move the results for this strip to the perm array */
for (j = 0; j < cresult.segs; j++) {
offseta[csegs_count + j] = temp_offset[j];
sizea[csegs_count + j] = temp_size[j];
}
csegs_count += cresult.segs;
cagg_len += cresult.bytes;

if(cagg_len > sresult.bytes) {
gossip_err("%s: Client Agg len too big\n", __func__);
ret = -EINVAL;
goto out;
}
}

#if 0
printf("CLIENT INFO [%d]:\n", i);
printf("Total size %lld\n", lld(cagg_len));
for( j = 0; j < csegs_count; j++ ) {
printf("offset= %lld length= %lld\n", lld(offseta[j]),
lld(sizea[j]));
}
#endif

gossip_debug(GOSSIP_IO_DEBUG, "%s: %d: %d Server Segments, %lld bytes\n",
__func__, i, sresult.segs, lld(sresult.bytes));
gossip_debug(GOSSIP_IO_DEBUG, "%s: %d: %d Client Segments, %lld bytes\n",
__func__, i, cresult.segs, lld(cresult.bytes));
__func__, i, csegs_count, lld(cagg_len));


command = &sm_p->msgarray[i].osd_command;
Expand All @@ -300,21 +335,22 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p)
p = io->buffer;

if (sresult.segs == 1) { /* contiguous server buff to write to */
if (cresult.segs == 1) {
if (csegs_count == 1) {
p += offseta[0];
len = sizea[0];
cresult.segs = 0; /* do not build a 1-unit iovec */
} else if (cresult.segs > 1) {
iov = malloc(cresult.segs * sizeof(*iov));
csegs_count = 0; /* do not build a 1-unit iovec */
} else if (csegs_count > 1) {
iov = malloc(csegs_count * sizeof(*iov));
if (iov == NULL) {
ret = -ENOMEM;
goto out;
}
len = 0;
for (i=0; i<cresult.segs; i++) {
iov[i].iov_base = (uintptr_t) (p + offseta[i]);
iov[i].iov_len = sizea[i];
len += sizea[i];
int j;
for (j=0; j<csegs_count; j++) {
iov[j].iov_base = (uintptr_t) (p + offseta[j]);
iov[j].iov_len = sizea[j];
len += sizea[j];
}
p = (void *) iov;
sm_p->msgarray[i].osd_iov = p; /* free IOV later */
Expand All @@ -329,14 +365,14 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p)
len, 0);
command->indata = p;
command->inlen_alloc = len;
command->iov_inlen = cresult.segs;
command->iov_inlen = csegs_count;

} else if (io->io_type == PVFS_IO_WRITE) {
osd_command_set_write(command, PVFS_OSD_DATA_PID, datafile_handle,
len, 0);
command->outdata = p;
command->outlen = len;
command->iov_outlen = cresult.segs;
command->iov_outlen = csegs_count;
}

} else { /* either need a SGL or optimized SGL */
Expand Down Expand Up @@ -374,11 +410,11 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p)
total_len += segl;
}

//flag=0; /* force SGL mode always so we can compare to VEC later */
//~ flag=0; /* force SGL mode always so we can compare to VEC later */
/* move this into a config file or something -- remove eventually*/

if (io->io_type == PVFS_IO_WRITE) {
iov = malloc((cresult.segs + 1) * sizeof(*iov));
iov = malloc((csegs_count + 1) * sizeof(*iov));
if (iov == NULL) {
ret = -ENOMEM;
goto out;
Expand Down Expand Up @@ -427,7 +463,7 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p)

iov[0].iov_base = (uintptr_t)sgl;
iov[0].iov_len = ddt_size;
for (j=1; j<=cresult.segs; j++) {
for (j=1; j<=csegs_count; j++) {
iov[j].iov_base = (uintptr_t)(p + offseta[j-1]);
iov[j].iov_len = sizea[j-1];
}
Expand All @@ -445,28 +481,28 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p)
len, 0);
command->outdata = p;
command->outlen = len;
command->iov_outlen = cresult.segs+1;
command->iov_outlen = csegs_count+1;

if (flag)
osd_command_set_ddt(command, DDT_VEC);
else
osd_command_set_ddt(command, DDT_SGL);

} else if (io->io_type == PVFS_IO_READ) {
if (cresult.segs == 1) {
if (csegs_count == 1) {
p += offseta[0];
len = sizea[0];
cresult.segs = 0; /* do not build a 1-unit iovec */
} else if (cresult.segs > 1) {
iov = malloc(cresult.segs * sizeof(*iov));
csegs_count = 0; /* do not build a 1-unit iovec */
} else if (csegs_count > 1) {
iov = malloc(csegs_count * sizeof(*iov));
if (iov == NULL) {
ret = -ENOMEM;
goto out;
}
len = 0;
sm_p->msgarray[i].osd_iov = iov; /* free sgl later */

for (i=0; i<cresult.segs; i++) {
for (i=0; i<csegs_count; i++) {
iov[i].iov_base = (uintptr_t) (p + offseta[i]);
iov[i].iov_len = sizea[i];
len += sizea[i];
Expand All @@ -478,7 +514,7 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p)
len, 0);
command->indata = p;
command->inlen_alloc = len;
command->iov_inlen = cresult.segs;
command->iov_inlen = csegs_count;

len = 0;

Expand Down Expand Up @@ -535,12 +571,21 @@ static int osd_io_setup_msgpairs(struct PINT_smcb *smcb, job_status_s *js_p)

}


}




gossip_debug(GOSSIP_IO_DEBUG, "%s: Aggreagte Written: %lld\n", __func__,
lld(this_aggregate_size));

if (this_aggregate_size != sbytemax) {
gossip_err("%s: Did not get correct amount of data\n", __func__);
ret = -EINVAL;
}

out:
js_p->error_code = ret;
js_p->error_code = ret;
return 1;
}

Expand All @@ -563,7 +608,6 @@ static int osd_io_completion_fn(




/* fixup short read error */
if (io->io_type == PVFS_IO_READ) {
if (command->status == SAM_STAT_CHECK_CONDITION) {
Expand Down Expand Up @@ -653,7 +697,6 @@ static int osd_io_maybe_xfer_more(struct PINT_smcb *smcb, job_status_s *js_p)
/* only need to look at the first one */
//~ struct osd_command *command = &sm_p->msgarray[0].osd_command;


gossip_debug(GOSSIP_IO_DEBUG, "%s: total %lld want %lld.\n", __func__,
lld(io->total_size), lld(io->mem_req->aggregate_size));

Expand Down Expand Up @@ -710,7 +753,6 @@ static int osd_io_analyze_results(struct PINT_smcb *smcb, job_status_s *js_p)
struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
PVFS_offset eor, filereq_ub_offset;
int ret = 0;

gossip_debug(GOSSIP_IO_DEBUG, "%s: total bytes transferred %lld\n",
__func__, lld(sm_p->u.io.total_size));

Expand Down Expand Up @@ -768,7 +810,6 @@ static int osd_io_cleanup(struct PINT_smcb *smcb, job_status_s *js_p)
{
struct PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
struct PINT_client_io_sm *io = &sm_p->u.io;

free(sm_p->msgarray);
sm_p->msgarray = NULL;
sm_p->msgarray_count = 0;
Expand Down

0 comments on commit 05174d6

Please sign in to comment.