Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Address high IOPs usage of the Gnocchi Ceph pool #1381

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 42 additions & 6 deletions gnocchi/storage/ceph.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import collections
import daiquiri

from oslo_config import cfg

Expand Down Expand Up @@ -42,6 +43,11 @@

rados = ceph.rados

LOG = daiquiri.getLogger(__name__)

DEFAULT_RADOS_BUFFER_SIZE = 8192
MAP_UNAGGREGATED_METRIC_NAME_BY_SIZE = {}


class CephStorage(storage.StorageDriver):
WRITE_FULL = False
Expand Down Expand Up @@ -88,6 +94,13 @@ def _store_metric_splits(self, metrics_keys_aggregations_data_offset,
for key, agg, data, offset in keys_aggregations_data_offset:
name = self._get_object_name(
metric, key, agg.method, version)
metric_size = len(data)

if metric_size > DEFAULT_RADOS_BUFFER_SIZE:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldnt we keep the old metric_size if it is greater than the new one? it could reduce some problems related to volatile object sizes (which increase and decrease constantly).

If the objects are constantly growing and never gets smaller, maybe using some approach like "if the new size is greater than the current buffer size, I set the new buffer size as two times the new size", it should reduce some unnecessary reads if the rados object gets always bigger.

It is just a suggestion, the overall code seems pretty good to me, good work.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is a good point. We have not seen this case of objects reducing in size and never growing back. Normally, they will grow up to a certain size, when the back-window is saturated, and they will never go beyond that. That is why we are using the exact value of the length and not any other technique such as using bigger numbers and son on.

I mean, once we reach the maximum RADOS object size according to the limit of the backwindow, the object will maintain that size as the truncate is only executed when the new datapoints are received. Therefore, one new comes in, and one old is deleted.

MAP_UNAGGREGATED_METRIC_NAME_BY_SIZE[name] = metric_size
LOG.debug(
"Storing time series size [%s] for metric [%s].",
metric_size, name)
if offset is None:
self.ioctx.write_full(name, data)
else:
Expand Down Expand Up @@ -153,7 +166,14 @@ def _get_splits_unbatched(self, metric, key, aggregation, version=3):
try:
name = self._get_object_name(
metric, key, aggregation.method, version)
return self._get_object_content(name)

metric_size = MAP_UNAGGREGATED_METRIC_NAME_BY_SIZE.get(
name, DEFAULT_RADOS_BUFFER_SIZE)

LOG.debug("Reading metric [%s] with buffer size of [%s].",
name, metric_size)

return self._get_object_content(name, buffer_size=metric_size)
except rados.ObjectNotFound:
return

Expand Down Expand Up @@ -206,9 +226,16 @@ def _build_unaggregated_timeserie_path(metric, version):

def _get_or_create_unaggregated_timeseries_unbatched(
self, metric, version=3):
metric_name = self._build_unaggregated_timeserie_path(metric, version)
metric_size = MAP_UNAGGREGATED_METRIC_NAME_BY_SIZE.get(
metric_name, DEFAULT_RADOS_BUFFER_SIZE)

LOG.debug("Reading unaggregated metric [%s] with buffer size of [%s].",
metric_name, metric_size)

try:
contents = self._get_object_content(
self._build_unaggregated_timeserie_path(metric, version))
metric_name, buffer_size=metric_size)
except rados.ObjectNotFound:
self._create_metric(metric)
else:
Expand All @@ -218,14 +245,23 @@ def _get_or_create_unaggregated_timeseries_unbatched(

def _store_unaggregated_timeseries_unbatched(
self, metric, data, version=3):
self.ioctx.write_full(
self._build_unaggregated_timeserie_path(metric, version), data)

def _get_object_content(self, name):
metric_name = self._build_unaggregated_timeserie_path(metric, version)
metric_size = len(data)

if metric_size > DEFAULT_RADOS_BUFFER_SIZE:
MAP_UNAGGREGATED_METRIC_NAME_BY_SIZE[metric_name] = metric_size
LOG.debug(
"Storing unaggregated time series size [%s] for metric [%s]",
metric_size, metric_name)
self.ioctx.write_full(metric_name, data)

def _get_object_content(self, name, buffer_size=DEFAULT_RADOS_BUFFER_SIZE):
offset = 0
content = b''

while True:
data = self.ioctx.read(name, offset=offset)
data = self.ioctx.read(name, length=buffer_size, offset=offset)
if not data:
break
content += data
Expand Down
Loading