Reducing Checkpoint Overhead in Grid Environment
Cited by
Export citation
- BibTex
- RIS
- TXT
@Article{JICS-12-243,
author = {A. S. Faki and R. G. Jimoh},
title = {Reducing Checkpoint Overhead in Grid Environment},
journal = {Journal of Information and Computing Science},
year = {2024},
volume = {12},
number = {4},
pages = {243--247},
abstract = { Grid Computing has become major player in super-computing community. But due to the
diversity and disruptive nature of its resources, failure of jobs is not an exception. However, many
researchers have come up with models that enhance jobs survivability. Popular among this model is
checkpoint model which have the ability of saving already computed jobs on a stable secured storage. This
model avoids re-computing of already computed jobs from the scratch in case of resources failure. But the
time a job takes in checkpoinitng also becomes another task which adds overheads to computing resources
thereby reducing the resources performance. In order not to add too many overheads to computing resources,
the number of checkpoints must be minimized. This study proposed checkpoint interval models which is
implemented based on fault index history of computing resources. Failed jobs are re-allocated from their last
saved checkpoint using an exception handler. The study observed that arithmetic checkpoint model is better
used when fault index of computing resources is high while geometric checkpoint model is better when fault
index of resources is low.
},
issn = {1746-7659},
doi = {https://doi.org/},
url = {http://global-sci.org/intro/article_detail/jics/22466.html}
}
TY - JOUR
T1 - Reducing Checkpoint Overhead in Grid Environment
AU - A. S. Faki and R. G. Jimoh
JO - Journal of Information and Computing Science
VL - 4
SP - 243
EP - 247
PY - 2024
DA - 2024/01
SN - 12
DO - http://doi.org/
UR - https://global-sci.org/intro/article_detail/jics/22466.html
KW - Arithmetic Checkpoint, Exception Handler, Fault Tolerance. Geometric Checkpoint.
AB - Grid Computing has become major player in super-computing community. But due to the
diversity and disruptive nature of its resources, failure of jobs is not an exception. However, many
researchers have come up with models that enhance jobs survivability. Popular among this model is
checkpoint model which have the ability of saving already computed jobs on a stable secured storage. This
model avoids re-computing of already computed jobs from the scratch in case of resources failure. But the
time a job takes in checkpoinitng also becomes another task which adds overheads to computing resources
thereby reducing the resources performance. In order not to add too many overheads to computing resources,
the number of checkpoints must be minimized. This study proposed checkpoint interval models which is
implemented based on fault index history of computing resources. Failed jobs are re-allocated from their last
saved checkpoint using an exception handler. The study observed that arithmetic checkpoint model is better
used when fault index of computing resources is high while geometric checkpoint model is better when fault
index of resources is low.
A. S. Faki and R. G. Jimoh. (2024). Reducing Checkpoint Overhead in Grid Environment.
Journal of Information and Computing Science. 12 (4).
243-247.
doi:
Copy to clipboard