Skip to content

Commit

Permalink
Merge pull request #760 from deniszh/backport/1.1.x/pr-743_pr-743_pr-…
Browse files Browse the repository at this point in the history
…743_pr-743_pr-753

[1.1.x] support for storing tagged series in hashed filenames | codacy fix | move any existing non-hashed files to hashed names if needed | codacy fix | Adding dummy.txt to storage dirs
  • Loading branch information
deniszh authored Apr 4, 2018
2 parents 64f9e3f + 0ae9794 commit af2d575
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 23 deletions.
3 changes: 3 additions & 0 deletions conf/carbon.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,9 @@ WHISPER_FALLOCATE_CREATE = True
# an update to the tag index, the default setting is once every 100 updates
# TAG_UPDATE_INTERVAL = 100

# Tag hash filenames, this specifies whether tagged metric filenames should use the hash of the metric name
# or a human-readable name, using hashed names avoids issues with path length when using a large number of tags
# TAG_HASH_FILENAMES = True

# Tag batch size, this specifies the maximum number of series to be sent to graphite-web in a single batch
# TAG_BATCH_SIZE = 100
Expand Down
1 change: 1 addition & 0 deletions lib/carbon/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
TAG_UPDATE_INTERVAL=100,
TAG_BATCH_SIZE=100,
TAG_QUEUE_SIZE=10000,
TAG_HASH_FILENAMES=True,
ENABLE_MANHOLE=False,
MANHOLE_INTERFACE='127.0.0.1',
MANHOLE_PORT=7222,
Expand Down
45 changes: 37 additions & 8 deletions lib/carbon/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def __init__(self, settings):
super(WhisperDatabase, self).__init__(settings)

self.data_dir = settings.LOCAL_DATA_DIR
self.tag_hash_filenames = settings.TAG_HASH_FILENAMES
self.sparse_create = settings.WHISPER_SPARSE_CREATE
self.fallocate_create = settings.WHISPER_FALLOCATE_CREATE
if settings.WHISPER_AUTOFLUSH:
Expand Down Expand Up @@ -123,7 +124,13 @@ def write(self, metric, datapoints):
whisper.update_many(path, datapoints)

def exists(self, metric):
return exists(self.getFilesystemPath(metric))
if exists(self.getFilesystemPath(metric)):
return True
# if we're using hashed filenames and a non-hashed file exists then move it to the new name
if self.tag_hash_filenames and exists(self._getFilesystemPath(metric, False)):
os.rename(self._getFilesystemPath(metric, False), self.getFilesystemPath(metric))
return True
return False

def create(self, metric, retentions, xfilesfactor, aggregation_method):
path = self.getFilesystemPath(metric)
Expand Down Expand Up @@ -152,7 +159,13 @@ def setMetadata(self, metric, key, value):
return whisper.setAggregationMethod(wsp_path, value)

def getFilesystemPath(self, metric):
return join(self.data_dir, TaggedSeries.encode(metric, sep) + '.wsp')
return self._getFilesystemPath(metric, self.tag_hash_filenames)

def _getFilesystemPath(self, metric, tag_hash_filenames):
return join(
self.data_dir,
TaggedSeries.encode(metric, sep, hash_only=tag_hash_filenames) + '.wsp'
)

def validateArchiveList(self, archiveList):
try:
Expand All @@ -174,6 +187,7 @@ def __init__(self, settings):
super(CeresDatabase, self).__init__(settings)

self.data_dir = settings.LOCAL_DATA_DIR
self.tag_hash_filenames = settings.TAG_HASH_FILENAMES
ceres.setDefaultNodeCachingBehavior(settings.CERES_NODE_CACHING_BEHAVIOR)
ceres.setDefaultSliceCachingBehavior(settings.CERES_SLICE_CACHING_BEHAVIOR)
ceres.MAX_SLICE_GAP = int(settings.CERES_MAX_SLICE_GAP)
Expand All @@ -187,26 +201,41 @@ def __init__(self, settings):

self.tree = ceres.CeresTree(self.data_dir)

def encode(self, metric, tag_hash_filenames=None):
if tag_hash_filenames is None:
tag_hash_filenames = self.tag_hash_filenames
return TaggedSeries.encode(metric, hash_only=tag_hash_filenames)

def write(self, metric, datapoints):
self.tree.store(TaggedSeries.encode(metric), datapoints)
self.tree.store(self.encode(metric), datapoints)

def exists(self, metric):
return self.tree.hasNode(TaggedSeries.encode(metric))
if self.tree.hasNode(self.encode(metric)):
return True
# if we're using hashed filenames and a non-hashed file exists then move it to the new name
if self.tag_hash_filenames and self.tree.hasNode(self.encode(metric, False)):
os.rename(self._getFilesystemPath(metric, False), self.getFilesystemPath(metric))
return True
return False

def create(self, metric, retentions, xfilesfactor, aggregation_method):
self.tree.createNode(TaggedSeries.encode(metric), retentions=retentions,
self.tree.createNode(self.encode(metric),
retentions=retentions,
timeStep=retentions[0][0],
xFilesFactor=xfilesfactor,
aggregationMethod=aggregation_method)

def getMetadata(self, metric, key):
return self.tree.getNode(TaggedSeries.encode(metric)).readMetadata()[key]
return self.tree.getNode(self.encode(metric)).readMetadata()[key]

def setMetadata(self, metric, key, value):
node = self.tree.getNode(TaggedSeries.encode(metric))
node = self.tree.getNode(self.encode(metric))
metadata = node.readMetadata()
metadata[key] = value
node.writeMetadata(metadata)

def getFilesystemPath(self, metric):
return self.tree.getFilesystemPath(TaggedSeries.encode(metric))
return self._getFilesystemPath(metric, self.tag_hash_filenames)

def _getFilesystemPath(self, metric, tag_hash_filenames):
return self.tree.getFilesystemPath(self.encode(metric, tag_hash_filenames))
162 changes: 152 additions & 10 deletions lib/carbon/tests/test_database.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
from unittest import TestCase
from mock import patch
from os.path import exists
import shutil

from carbon.tests.util import TestSettings
from carbon.database import WhisperDatabase, CeresDatabase
Expand All @@ -11,40 +13,180 @@ class WhisperDatabaseTest(TestCase):
def setUp(self):
self._sep_patch = patch.object(os.path, 'sep', "/")
self._sep_patch.start()
settings = TestSettings()
settings['LOCAL_DATA_DIR'] = '/tmp/'
self.database = WhisperDatabase(settings)

def tearDown(self):
self._sep_patch.stop()

def test_getFilesystemPath(self):
result = self.database.getFilesystemPath('stats.example.counts')
settings = TestSettings()
settings['LOCAL_DATA_DIR'] = '/tmp/'
database = WhisperDatabase(settings)
result = database.getFilesystemPath('stats.example.counts')
self.assertEqual(result, '/tmp/stats/example/counts.wsp') # nosec

def test_getTaggedFilesystemPath(self):
result = self.database.getFilesystemPath('stats.example.counts;tag1=value1')
metric = 'stats.example.counts;tag1=value1'

settings = TestSettings()
settings['LOCAL_DATA_DIR'] = '/tmp/'
settings['TAG_HASH_FILENAMES'] = False
database = WhisperDatabase(settings)

result = database.getFilesystemPath(metric)
self.assertEqual(
result, '/tmp/_tagged/872/252/stats_DOT_example_DOT_counts;tag1=value1.wsp') # nosec

result = database.exists(metric)
self.assertEqual(result, False)

def test_getTaggedFilesystemPathHashed(self):
metric = 'stats.example.counts;tag1=value1'

settings = TestSettings()
settings['LOCAL_DATA_DIR'] = '/tmp/'
settings['TAG_HASH_FILENAMES'] = True
database = WhisperDatabase(settings)

result = database.getFilesystemPath(metric)
self.assertEqual(
result,
'/tmp/_tagged/872/252/' + # nosec
'872252dcead671982862f82a3b440f02aa8f525dd6d0f2921de0dc2b3e874ad0.wsp')

result = database.exists(metric)
self.assertEqual(result, False)

def test_migrateTaggedFilesystemPathHashed(self):
metric = 'stats.example.counts;tag1=value1'

settings = TestSettings()
settings['LOCAL_DATA_DIR'] = '/tmp/'
settings['TAG_HASH_FILENAMES'] = False
database = WhisperDatabase(settings)

result = database.exists(metric)
self.assertEqual(result, False)

old_path = database.getFilesystemPath(metric)
self.assertEqual(
old_path, '/tmp/_tagged/872/252/stats_DOT_example_DOT_counts;tag1=value1.wsp') # nosec

self.assertEqual(exists(old_path), False)

result = database.create(metric, [(60, 60)], 0.5, 'average')

self.assertEqual(exists(old_path), True)

result = database.exists(metric)
self.assertEqual(result, True)

settings['TAG_HASH_FILENAMES'] = True
database = WhisperDatabase(settings)

hashed_path = database.getFilesystemPath(metric)
self.assertEqual(
hashed_path,
'/tmp/_tagged/872/252/' + # nosec
'872252dcead671982862f82a3b440f02aa8f525dd6d0f2921de0dc2b3e874ad0.wsp')

self.assertEqual(exists(hashed_path), False)

result = database.exists(metric)
self.assertEqual(result, True)

self.assertEqual(exists(old_path), False)
self.assertEqual(exists(hashed_path), True)

os.remove(hashed_path)


class CeresDatabaseTest(TestCase):

def setUp(self):
self._sep_patch = patch.object(os.path, 'sep', "/")
self._sep_patch.start()
settings = TestSettings()
settings['LOCAL_DATA_DIR'] = '/tmp/'
self.database = CeresDatabase(settings)

def tearDown(self):
self._sep_patch.stop()

def test_getFilesystemPath(self):
result = self.database.getFilesystemPath('stats.example.counts')
settings = TestSettings()
settings['LOCAL_DATA_DIR'] = '/tmp/'
database = CeresDatabase(settings)
result = database.getFilesystemPath('stats.example.counts')
self.assertEqual(result, '/tmp/stats/example/counts') # nosec

def test_getTaggedFilesystemPath(self):
result = self.database.getFilesystemPath('stats.example.counts;tag1=value1')
metric = 'stats.example.counts;tag1=value1'

settings = TestSettings()
settings['LOCAL_DATA_DIR'] = '/tmp/'
settings['TAG_HASH_FILENAMES'] = False
database = CeresDatabase(settings)

result = database.getFilesystemPath(metric)
self.assertEqual(
result, '/tmp/_tagged/872/252/stats_DOT_example_DOT_counts;tag1=value1') # nosec

result = database.exists(metric)
self.assertEqual(result, False)

def test_getTaggedFilesystemPathHashed(self):
metric = 'stats.example.counts;tag1=value1'

settings = TestSettings()
settings['LOCAL_DATA_DIR'] = '/tmp/'
settings['TAG_HASH_FILENAMES'] = True
database = CeresDatabase(settings)

result = database.getFilesystemPath(metric)
self.assertEqual(
result,
'/tmp/_tagged/872/252/' + # nosec
'872252dcead671982862f82a3b440f02aa8f525dd6d0f2921de0dc2b3e874ad0')

result = database.exists(metric)
self.assertEqual(result, False)

def test_migrateTaggedFilesystemPathHashed(self):
metric = 'stats.example.counts;tag1=value1'

settings = TestSettings()
settings['LOCAL_DATA_DIR'] = '/tmp/'
settings['TAG_HASH_FILENAMES'] = False
database = CeresDatabase(settings)

result = database.exists(metric)
self.assertEqual(result, False)

old_path = database.getFilesystemPath(metric)
self.assertEqual(
old_path, '/tmp/_tagged/872/252/stats_DOT_example_DOT_counts;tag1=value1') # nosec

self.assertEqual(exists(old_path), False)

result = database.create(metric, [(60, 60)], 0.5, 'average')

self.assertEqual(exists(old_path), True)

result = database.exists(metric)
self.assertEqual(result, True)

settings['TAG_HASH_FILENAMES'] = True
database = CeresDatabase(settings)

hashed_path = database.getFilesystemPath(metric)
self.assertEqual(
hashed_path,
'/tmp/_tagged/872/252/' + # nosec
'872252dcead671982862f82a3b440f02aa8f525dd6d0f2921de0dc2b3e874ad0')

self.assertEqual(exists(hashed_path), False)

result = database.exists(metric)
self.assertEqual(result, True)

self.assertEqual(exists(old_path), False)
self.assertEqual(exists(hashed_path), True)

shutil.rmtree(hashed_path)
13 changes: 11 additions & 2 deletions lib/carbon/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def format(tags):
]))

@staticmethod
def encode(metric, sep='.'):
def encode(metric, sep='.', hash_only=False):
"""
Helper function to encode tagged series for storage in whisper etc
Expand All @@ -404,6 +404,10 @@ def encode(metric, sep='.'):
each carbon database and graphite-web finder is responsible for handling its own encoding so
that different backends can create their own schemes if desired.
The hash_only parameter can be set to True to use the hash as the filename instead of a
human-readable name. This avoids issues with filename length restrictions, at the expense of
being unable to decode the filename and determine the original metric name.
A concrete example:
.. code-block:: none
Expand All @@ -418,7 +422,12 @@ def encode(metric, sep='.'):
"""
if ';' in metric:
metric_hash = sha256(metric.encode('utf8')).hexdigest()
return sep.join(['_tagged', metric_hash[0:3], metric_hash[3:6], metric.replace('.', '_DOT_')])
return sep.join([
'_tagged',
metric_hash[0:3],
metric_hash[3:6],
metric_hash if hash_only else metric.replace('.', '_DOT_')
])

# metric isn't tagged, just replace dots with the separator and trim any leading separator
return metric.replace('.', sep).lstrip(sep)
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@
setup_kwargs = dict()


storage_dirs = [ ('storage/ceres', []), ('storage/whisper',[]),
('storage/lists',[]), ('storage/log',[]),
('storage/rrd',[]) ]
storage_dirs = [ ('storage/ceres/dummy.txt', []), ('storage/whisper/dummy.txt',[]),
('storage/lists',[]), ('storage/log/dummy.txt',[]),
('storage/rrd/dummy.txt',[]) ]
conf_files = [ ('conf', glob('conf/*.example')) ]

install_files = storage_dirs + conf_files
Expand Down

0 comments on commit af2d575

Please sign in to comment.