Merged from trunk
This commit is contained in:
		
							
								
								
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							@@ -15,6 +15,7 @@ Chuck Thier
 | 
			
		||||
 | 
			
		||||
Contributors
 | 
			
		||||
------------
 | 
			
		||||
Joe Arnold
 | 
			
		||||
Chmouel Boudjnah
 | 
			
		||||
Anne Gentle
 | 
			
		||||
Clay Gerrard
 | 
			
		||||
 
 | 
			
		||||
@@ -1,27 +0,0 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
from swift.stats.db_stats_collector import AccountStatsCollector
 | 
			
		||||
from swift.common.utils import parse_options
 | 
			
		||||
from swift.common.daemon import run_daemon
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    conf_file, options = parse_options()
 | 
			
		||||
    # currently AccountStatsCollector only supports run_once
 | 
			
		||||
    options['once'] = True
 | 
			
		||||
    run_daemon(AccountStatsCollector, conf_file,
 | 
			
		||||
               section_name='log-processor-stats',
 | 
			
		||||
               log_name="account-stats", **options)
 | 
			
		||||
@@ -1,27 +0,0 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
from swift.stats.db_stats_collector import ContainerStatsCollector
 | 
			
		||||
from swift.common.utils import parse_options
 | 
			
		||||
from swift.common.daemon import run_daemon
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    conf_file, options = parse_options()
 | 
			
		||||
    # currently ContainerStatsCollector only supports run_once
 | 
			
		||||
    options['once'] = True
 | 
			
		||||
    run_daemon(ContainerStatsCollector, conf_file,
 | 
			
		||||
               section_name='log-processor-container-stats',
 | 
			
		||||
               log_name="container-stats", **options)
 | 
			
		||||
@@ -1,35 +0,0 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
from optparse import OptionParser
 | 
			
		||||
 | 
			
		||||
from swift.stats.log_processor import LogProcessorDaemon
 | 
			
		||||
from swift.common.utils import parse_options
 | 
			
		||||
from swift.common.daemon import run_daemon
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    parser = OptionParser(usage='Usage: %prog [options] <conf_file>')
 | 
			
		||||
 | 
			
		||||
    parser.add_option('--lookback_hours', type='int', dest='lookback_hours',
 | 
			
		||||
        help='Hours in the past to start looking for log files')
 | 
			
		||||
    parser.add_option('--lookback_window', type='int', dest='lookback_window',
 | 
			
		||||
        help='Hours past lookback_hours to stop looking for log files')
 | 
			
		||||
 | 
			
		||||
    conf_file, options = parse_options(parser)
 | 
			
		||||
    # currently the LogProcessorDaemon only supports run_once
 | 
			
		||||
    options['once'] = True
 | 
			
		||||
    run_daemon(LogProcessorDaemon, conf_file, section_name=None,
 | 
			
		||||
               log_name='log-stats-collector', **options)
 | 
			
		||||
@@ -1,49 +0,0 @@
 | 
			
		||||
#!/usr/bin/env python
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
from optparse import OptionParser
 | 
			
		||||
from swift.stats.log_uploader import LogUploader
 | 
			
		||||
from swift.common.utils import parse_options
 | 
			
		||||
from swift.common import utils
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    parser = OptionParser("Usage: %prog CONFIG_FILE PLUGIN")
 | 
			
		||||
    parser.add_option('-c', '--log_cutoff',
 | 
			
		||||
                      help='Override new_log_cutoff.')
 | 
			
		||||
    parser.add_option('-x', '--regex',
 | 
			
		||||
                      help='Override source_filename_pattern regex.')
 | 
			
		||||
    conf_file, options = parse_options(parser=parser)
 | 
			
		||||
    try:
 | 
			
		||||
        plugin = options['extra_args'][0]
 | 
			
		||||
    except (IndexError, KeyError):
 | 
			
		||||
        print "Error: missing plugin name"
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
 | 
			
		||||
    uploader_conf = utils.readconf(conf_file, 'log-processor')
 | 
			
		||||
    section_name = 'log-processor-%s' % plugin
 | 
			
		||||
    plugin_conf = utils.readconf(conf_file, section_name)
 | 
			
		||||
    uploader_conf.update(plugin_conf)
 | 
			
		||||
 | 
			
		||||
    # pre-configure logger
 | 
			
		||||
    logger = utils.get_logger(uploader_conf, log_route='log-uploader',
 | 
			
		||||
                              log_to_console=options.get('verbose', False))
 | 
			
		||||
    # currently LogUploader only supports run_once
 | 
			
		||||
    options['once'] = True
 | 
			
		||||
    regex = options.get('regex')
 | 
			
		||||
    cutoff = options.get('log_cutoff')
 | 
			
		||||
    uploader = LogUploader(uploader_conf, plugin,
 | 
			
		||||
                           regex=regex, cutoff=cutoff).run(**options)
 | 
			
		||||
@@ -630,7 +630,6 @@ Setting up scripts for running Swift
 | 
			
		||||
  #. `remakerings`
 | 
			
		||||
  #. `cd ~/swift/trunk; ./.unittests`
 | 
			
		||||
  #. `startmain` (The ``Unable to increase file descriptor limit.  Running as non-root?`` warnings are expected and ok.)
 | 
			
		||||
  #. `recreateaccounts`
 | 
			
		||||
  #. Get an `X-Storage-Url` and `X-Auth-Token`: ``curl -v -H 'X-Storage-User: test:tester' -H 'X-Storage-Pass: testing' http://127.0.0.1:8080/auth/v1.0``
 | 
			
		||||
  #. Check that you can GET account: ``curl -v -H 'X-Auth-Token: <token-from-x-auth-token-above>' <url-from-x-storage-url-above>``
 | 
			
		||||
  #. Check that `swift` works: `swift -A http://127.0.0.1:8080/auth/v1.0 -U test:tester -K testing stat`
 | 
			
		||||
 
 | 
			
		||||
@@ -42,7 +42,6 @@ Overview and Concepts
 | 
			
		||||
    overview_reaper
 | 
			
		||||
    overview_auth
 | 
			
		||||
    overview_replication
 | 
			
		||||
    overview_stats
 | 
			
		||||
    ratelimit
 | 
			
		||||
    overview_large_objects
 | 
			
		||||
    overview_container_sync
 | 
			
		||||
 
 | 
			
		||||
@@ -1,192 +0,0 @@
 | 
			
		||||
==================
 | 
			
		||||
Swift stats system
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
The swift stats system is composed of three parts parts: log creation, log
 | 
			
		||||
uploading, and log processing. The system handles two types of logs (access
 | 
			
		||||
and account stats), but it can be extended to handle other types of logs.
 | 
			
		||||
 | 
			
		||||
---------
 | 
			
		||||
Log Types
 | 
			
		||||
---------
 | 
			
		||||
 | 
			
		||||
***********
 | 
			
		||||
Access logs
 | 
			
		||||
***********
 | 
			
		||||
 | 
			
		||||
Access logs are the proxy server logs. Rackspace uses syslog-ng to redirect
 | 
			
		||||
the proxy log output to an hourly log file. For example, a proxy request that
 | 
			
		||||
is made on August 4, 2010 at 12:37 gets logged in a file named 2010080412.
 | 
			
		||||
This allows easy log rotation and easy per-hour log processing.
 | 
			
		||||
 | 
			
		||||
*********************************
 | 
			
		||||
Account / Container DB stats logs
 | 
			
		||||
*********************************
 | 
			
		||||
 | 
			
		||||
DB stats logs are generated by a stats system process.
 | 
			
		||||
swift-account-stats-logger runs on each account server (via cron) and walks
 | 
			
		||||
the filesystem looking for account databases. When an account database is
 | 
			
		||||
found, the logger selects the account hash, bytes_used, container_count, and
 | 
			
		||||
object_count. These values are then written out as one line in a csv file. One
 | 
			
		||||
csv file is produced for every run of swift-account-stats-logger. This means
 | 
			
		||||
that, system wide, one csv file is produced for every storage node. Rackspace
 | 
			
		||||
runs the account stats logger every hour. Therefore, in a cluster of ten
 | 
			
		||||
account servers, ten csv files are produced every hour. Also, every account
 | 
			
		||||
will have one entry for every replica in the system. On average, there will be
 | 
			
		||||
three copies of each account in the aggregate of all account stat csv files
 | 
			
		||||
created in one system-wide run. The swift-container-stats-logger runs in a
 | 
			
		||||
similar fashion, scanning the container dbs.
 | 
			
		||||
 | 
			
		||||
----------------------
 | 
			
		||||
Log Processing plugins
 | 
			
		||||
----------------------
 | 
			
		||||
 | 
			
		||||
The swift stats system is written to allow a plugin to be defined for every
 | 
			
		||||
log type. Swift includes plugins for both access logs and storage stats logs.
 | 
			
		||||
Each plugin is responsible for defining, in a config section, where the logs
 | 
			
		||||
are stored on disk, where the logs will be stored in swift (account and
 | 
			
		||||
container), the filename format of the logs on disk, the location of the
 | 
			
		||||
plugin class definition, and any plugin-specific config values.
 | 
			
		||||
 | 
			
		||||
The plugin class definition defines three methods. The constructor must accept
 | 
			
		||||
one argument (the dict representation of the plugin's config section). The
 | 
			
		||||
process method must accept an iterator, and the account, container, and object
 | 
			
		||||
name of the log. The keylist_mapping accepts no parameters.
 | 
			
		||||
 | 
			
		||||
-------------
 | 
			
		||||
Log Uploading
 | 
			
		||||
-------------
 | 
			
		||||
 | 
			
		||||
swift-log-uploader accepts a config file and a plugin name. It finds the log
 | 
			
		||||
files on disk according to the plugin config section and uploads them to the
 | 
			
		||||
swift cluster. This means one uploader process will run on each proxy server
 | 
			
		||||
node and each account server node. To not upload partially-written log files,
 | 
			
		||||
the uploader will not upload files with an mtime of less than two hours ago.
 | 
			
		||||
Rackspace runs this process once an hour via cron.
 | 
			
		||||
 | 
			
		||||
--------------
 | 
			
		||||
Log Processing
 | 
			
		||||
--------------
 | 
			
		||||
 | 
			
		||||
swift-log-stats-collector accepts a config file and generates a csv that is
 | 
			
		||||
uploaded to swift. It loads all plugins defined in the config file, generates
 | 
			
		||||
a list of all log files in swift that need to be processed, and passes an
 | 
			
		||||
iterable of the log file data to the appropriate plugin's process method. The
 | 
			
		||||
process method returns a dictionary of data in the log file keyed on (account,
 | 
			
		||||
year, month, day, hour). The log-stats-collector process then combines all
 | 
			
		||||
dictionaries from all calls to a process method into one dictionary. Key
 | 
			
		||||
collisions within each (account, year, month, day, hour) dictionary are
 | 
			
		||||
summed. Finally, the summed dictionary is mapped to the final csv values with
 | 
			
		||||
each plugin's keylist_mapping method.
 | 
			
		||||
 | 
			
		||||
The resulting csv file has one line per (account, year, month, day, hour) for
 | 
			
		||||
all log files processed in that run of swift-log-stats-collector.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
================================
 | 
			
		||||
Running the stats system on SAIO
 | 
			
		||||
================================
 | 
			
		||||
 | 
			
		||||
#. Create a swift account to use for storing stats information, and note the
 | 
			
		||||
   account hash. The hash will be used in config files.
 | 
			
		||||
 | 
			
		||||
#. Edit /etc/rsyslog.d/10-swift.conf::
 | 
			
		||||
 | 
			
		||||
    # Uncomment the following to have a log containing all logs together
 | 
			
		||||
    #local1,local2,local3,local4,local5.*   /var/log/swift/all.log
 | 
			
		||||
 | 
			
		||||
    $template HourlyProxyLog,"/var/log/swift/hourly/%$YEAR%%$MONTH%%$DAY%%$HOUR%"
 | 
			
		||||
    local1.*;local1.!notice ?HourlyProxyLog
 | 
			
		||||
 | 
			
		||||
    local1.*;local1.!notice /var/log/swift/proxy.log
 | 
			
		||||
    local1.notice           /var/log/swift/proxy.error
 | 
			
		||||
    local1.*                ~
 | 
			
		||||
 | 
			
		||||
#. Edit /etc/rsyslog.conf and make the following change::
 | 
			
		||||
    $PrivDropToGroup adm
 | 
			
		||||
 | 
			
		||||
#. `mkdir -p /var/log/swift/hourly`
 | 
			
		||||
#. `chown -R syslog.adm /var/log/swift`
 | 
			
		||||
#. `chmod 775 /var/log/swift /var/log/swift/hourly`
 | 
			
		||||
#. `service rsyslog restart`
 | 
			
		||||
#. `usermod -a -G adm <your-user-name>`
 | 
			
		||||
#. Relogin to let the group change take effect.
 | 
			
		||||
#. Create `/etc/swift/log-processor.conf`::
 | 
			
		||||
 | 
			
		||||
    [log-processor]
 | 
			
		||||
    swift_account = <your-stats-account-hash>
 | 
			
		||||
    user = <your-user-name>
 | 
			
		||||
 | 
			
		||||
    [log-processor-access]
 | 
			
		||||
    swift_account = <your-stats-account-hash>
 | 
			
		||||
    container_name = log_data
 | 
			
		||||
    log_dir = /var/log/swift/hourly/
 | 
			
		||||
    source_filename_pattern = ^
 | 
			
		||||
        (?P<year>[0-9]{4})
 | 
			
		||||
        (?P<month>[0-1][0-9])
 | 
			
		||||
        (?P<day>[0-3][0-9])
 | 
			
		||||
        (?P<hour>[0-2][0-9])
 | 
			
		||||
        .*$
 | 
			
		||||
    class_path = swift.stats.access_processor.AccessLogProcessor
 | 
			
		||||
    user = <your-user-name>
 | 
			
		||||
 | 
			
		||||
    [log-processor-stats]
 | 
			
		||||
    swift_account = <your-stats-account-hash>
 | 
			
		||||
    container_name = account_stats
 | 
			
		||||
    log_dir = /var/log/swift/stats/
 | 
			
		||||
    class_path = swift.stats.stats_processor.StatsLogProcessor
 | 
			
		||||
    devices = /srv/1/node
 | 
			
		||||
    mount_check = false
 | 
			
		||||
    user = <your-user-name>
 | 
			
		||||
 | 
			
		||||
    [log-processor-container-stats]
 | 
			
		||||
    swift_account = <your-stats-account-hash>
 | 
			
		||||
    container_name = container_stats
 | 
			
		||||
    log_dir = /var/log/swift/stats/
 | 
			
		||||
    class_path = swift.stats.stats_processor.StatsLogProcessor
 | 
			
		||||
    processable = false
 | 
			
		||||
    devices = /srv/1/node
 | 
			
		||||
    mount_check = false
 | 
			
		||||
    user = <your-user-name>
 | 
			
		||||
 | 
			
		||||
#. Add the following under [app:proxy-server] in `/etc/swift/proxy-server.conf`::
 | 
			
		||||
 | 
			
		||||
    log_facility = LOG_LOCAL1
 | 
			
		||||
 | 
			
		||||
#. Create a `cron` job to run once per hour to create the stats logs. In
 | 
			
		||||
   `/etc/cron.d/swift-stats-log-creator`::
 | 
			
		||||
 | 
			
		||||
    0 * * * * <your-user-name> /usr/local/bin/swift-account-stats-logger /etc/swift/log-processor.conf
 | 
			
		||||
 | 
			
		||||
#. Create a `cron` job to run once per hour to create the container stats logs. In
 | 
			
		||||
   `/etc/cron.d/swift-container-stats-log-creator`::
 | 
			
		||||
 | 
			
		||||
    5 * * * * <your-user-name> /usr/local/bin/swift-container-stats-logger /etc/swift/log-processor.conf
 | 
			
		||||
 | 
			
		||||
#. Create a `cron` job to run once per hour to upload the stats logs. In
 | 
			
		||||
   `/etc/cron.d/swift-stats-log-uploader`::
 | 
			
		||||
 | 
			
		||||
    10 * * * * <your-user-name> /usr/local/bin/swift-log-uploader /etc/swift/log-processor.conf stats
 | 
			
		||||
 | 
			
		||||
#. Create a `cron` job to run once per hour to upload the stats logs. In
 | 
			
		||||
   `/etc/cron.d/swift-stats-log-uploader`::
 | 
			
		||||
 | 
			
		||||
    15 * * * * <your-user-name> /usr/local/bin/swift-log-uploader /etc/swift/log-processor.conf container-stats
 | 
			
		||||
 | 
			
		||||
#. Create a `cron` job to run once per hour to upload the access logs. In
 | 
			
		||||
   `/etc/cron.d/swift-access-log-uploader`::
 | 
			
		||||
 | 
			
		||||
    5 * * * * <your-user-name> /usr/local/bin/swift-log-uploader /etc/swift/log-processor.conf access
 | 
			
		||||
 | 
			
		||||
#. Create a `cron` job to run once per hour to process the logs. In
 | 
			
		||||
   `/etc/cron.d/swift-stats-processor`::
 | 
			
		||||
 | 
			
		||||
    30 * * * * <your-user-name> /usr/local/bin/swift-log-stats-collector /etc/swift/log-processor.conf
 | 
			
		||||
 | 
			
		||||
After running for a few hours, you should start to see .csv files in the
 | 
			
		||||
log_processing_data container in the swift stats account that was created
 | 
			
		||||
earlier. This file will have one entry per account per hour for each account
 | 
			
		||||
with activity in that hour. One .csv file should be produced per hour. Note
 | 
			
		||||
that the stats will be delayed by at least two hours by default. This can be
 | 
			
		||||
changed with the new_log_cutoff variable in the config file. See
 | 
			
		||||
`log-processor.conf-sample` for more details.
 | 
			
		||||
@@ -44,17 +44,6 @@ use = egg:swift#account
 | 
			
		||||
# The replicator also performs reclamation
 | 
			
		||||
# reclaim_age = 86400
 | 
			
		||||
 | 
			
		||||
[account-stats]
 | 
			
		||||
# You can override the default log routing for this app here (don't use set!):
 | 
			
		||||
# log_name = account-stats
 | 
			
		||||
# log_facility = LOG_LOCAL0
 | 
			
		||||
# log_level = INFO
 | 
			
		||||
# cf_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
 | 
			
		||||
# container_name = account_stats
 | 
			
		||||
# proxy_server_conf = /etc/swift/proxy-server.conf
 | 
			
		||||
# log_facility = LOG_LOCAL0
 | 
			
		||||
# log_level = INFO
 | 
			
		||||
 | 
			
		||||
[account-auditor]
 | 
			
		||||
# You can override the default log routing for this app here (don't use set!):
 | 
			
		||||
# log_name = account-auditor
 | 
			
		||||
 
 | 
			
		||||
@@ -1,57 +0,0 @@
 | 
			
		||||
# plugin section format is named "log-processor-<plugin>"
 | 
			
		||||
 | 
			
		||||
[log-processor]
 | 
			
		||||
swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
 | 
			
		||||
# container_name = log_processing_data
 | 
			
		||||
# proxy_server_conf = /etc/swift/proxy-server.conf
 | 
			
		||||
# log_facility = LOG_LOCAL0
 | 
			
		||||
# log_level = INFO
 | 
			
		||||
# lookback_hours = 120
 | 
			
		||||
# lookback_window = 120
 | 
			
		||||
# user = swift
 | 
			
		||||
 | 
			
		||||
[log-processor-access]
 | 
			
		||||
# log_dir = /var/log/swift/
 | 
			
		||||
swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
 | 
			
		||||
container_name = log_data
 | 
			
		||||
source_filename_pattern = ^
 | 
			
		||||
    (?P<year>[0-9]{4})
 | 
			
		||||
    (?P<month>[0-1][0-9])
 | 
			
		||||
    (?P<day>[0-3][0-9])
 | 
			
		||||
    (?P<hour>[0-2][0-9])
 | 
			
		||||
    .*$
 | 
			
		||||
# new_log_cutoff = 7200
 | 
			
		||||
# unlink_log = True
 | 
			
		||||
class_path = swift.stats.access_processor.AccessLogProcessor
 | 
			
		||||
# service ips is for client ip addresses that should be counted as servicenet
 | 
			
		||||
# service_ips =
 | 
			
		||||
# load balancer private ips is for load balancer ip addresses that should be
 | 
			
		||||
# counted as servicenet
 | 
			
		||||
# lb_private_ips =
 | 
			
		||||
# server_name = proxy-server
 | 
			
		||||
# user = swift
 | 
			
		||||
# warn_percent = 0.8
 | 
			
		||||
 | 
			
		||||
[log-processor-stats]
 | 
			
		||||
# log_dir = /var/log/swift/
 | 
			
		||||
swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
 | 
			
		||||
container_name = account_stats
 | 
			
		||||
# new_log_cutoff = 7200
 | 
			
		||||
# unlink_log = True
 | 
			
		||||
class_path = swift.stats.stats_processor.StatsLogProcessor
 | 
			
		||||
# devices = /srv/node
 | 
			
		||||
# mount_check = true
 | 
			
		||||
# user = swift
 | 
			
		||||
 | 
			
		||||
[log-processor-container-stats]
 | 
			
		||||
# log_dir = /var/log/swift/
 | 
			
		||||
swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
 | 
			
		||||
container_name = container_stats
 | 
			
		||||
# new_log_cutoff = 7200
 | 
			
		||||
# unlink_log = True
 | 
			
		||||
class_path = swift.stats.stats_processor.StatsLogProcessor
 | 
			
		||||
processable = false
 | 
			
		||||
# devices = /srv/node
 | 
			
		||||
# mount_check = true
 | 
			
		||||
# user = swift
 | 
			
		||||
# metadata_keys = comma separated list of user metadata keys to be collected
 | 
			
		||||
							
								
								
									
										4
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								setup.py
									
									
									
									
									
								
							@@ -92,10 +92,6 @@ setup(
 | 
			
		||||
        'bin/swift-stats-report',
 | 
			
		||||
        'bin/swift-dispersion-populate', 'bin/swift-dispersion-report',
 | 
			
		||||
        'bin/swift-bench',
 | 
			
		||||
        'bin/swift-log-uploader',
 | 
			
		||||
        'bin/swift-log-stats-collector',
 | 
			
		||||
        'bin/swift-account-stats-logger',
 | 
			
		||||
        'bin/swift-container-stats-logger',
 | 
			
		||||
        ],
 | 
			
		||||
    entry_points={
 | 
			
		||||
        'paste.app_factory': [
 | 
			
		||||
 
 | 
			
		||||
@@ -29,7 +29,7 @@ import simplejson
 | 
			
		||||
 | 
			
		||||
from swift.common.db import AccountBroker
 | 
			
		||||
from swift.common.utils import get_logger, get_param, hash_path, \
 | 
			
		||||
    normalize_timestamp, split_path, storage_directory
 | 
			
		||||
    normalize_timestamp, split_path, storage_directory, XML_EXTRA_ENTITIES
 | 
			
		||||
from swift.common.constraints import ACCOUNT_LISTING_LIMIT, \
 | 
			
		||||
    check_mount, check_float, check_utf8
 | 
			
		||||
from swift.common.db_replicator import ReplicatorRpc
 | 
			
		||||
@@ -79,6 +79,9 @@ class AccountController(object):
 | 
			
		||||
        try:
 | 
			
		||||
            drive, part, account, container = split_path(unquote(req.path),
 | 
			
		||||
                                                         3, 4)
 | 
			
		||||
            if (account and not check_utf8(account)) or \
 | 
			
		||||
                    (container and not check_utf8(container)):
 | 
			
		||||
                raise ValueError('NULL characters not allowed in names')
 | 
			
		||||
        except ValueError, err:
 | 
			
		||||
            return HTTPBadRequest(body=str(err), content_type='text/plain',
 | 
			
		||||
                                  request=req)
 | 
			
		||||
@@ -201,8 +204,8 @@ class AccountController(object):
 | 
			
		||||
            marker = get_param(req, 'marker', '')
 | 
			
		||||
            end_marker = get_param(req, 'end_marker')
 | 
			
		||||
            query_format = get_param(req, 'format')
 | 
			
		||||
        except UnicodeDecodeError, err:
 | 
			
		||||
            return HTTPBadRequest(body='parameters not utf8',
 | 
			
		||||
        except (UnicodeDecodeError, ValueError), err:
 | 
			
		||||
            return HTTPBadRequest(body='parameters not utf8 or contain NULLs',
 | 
			
		||||
                                  content_type='text/plain', request=req)
 | 
			
		||||
        if query_format:
 | 
			
		||||
            req.accept = 'application/%s' % query_format.lower()
 | 
			
		||||
@@ -228,7 +231,7 @@ class AccountController(object):
 | 
			
		||||
            output_list = ['<?xml version="1.0" encoding="UTF-8"?>',
 | 
			
		||||
                           '<account name="%s">' % account]
 | 
			
		||||
            for (name, object_count, bytes_used, is_subdir) in account_list:
 | 
			
		||||
                name = saxutils.escape(name)
 | 
			
		||||
                name = saxutils.escape(name, XML_EXTRA_ENTITIES)
 | 
			
		||||
                if is_subdir:
 | 
			
		||||
                    output_list.append('<subdir name="%s" />' % name)
 | 
			
		||||
                else:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,73 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
import zlib
 | 
			
		||||
import struct
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CompressingFileReader(object):
 | 
			
		||||
    '''
 | 
			
		||||
    Wraps a file object and provides a read method that returns gzip'd data.
 | 
			
		||||
 | 
			
		||||
    One warning: if read is called with a small value, the data returned may
 | 
			
		||||
    be bigger than the value. In this case, the "compressed" data will be
 | 
			
		||||
    bigger than the original data. To solve this, use a bigger read buffer.
 | 
			
		||||
 | 
			
		||||
    An example use case:
 | 
			
		||||
    Given an uncompressed file on disk, provide a way to read compressed data
 | 
			
		||||
    without buffering the entire file data in memory. Using this class, an
 | 
			
		||||
    uncompressed log file could be uploaded as compressed data with chunked
 | 
			
		||||
    transfer encoding.
 | 
			
		||||
 | 
			
		||||
    gzip header and footer code taken from the python stdlib gzip module
 | 
			
		||||
 | 
			
		||||
    :param file_obj: File object to read from
 | 
			
		||||
    :param compresslevel: compression level
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    def __init__(self, file_obj, compresslevel=9):
 | 
			
		||||
        self._f = file_obj
 | 
			
		||||
        self._compressor = zlib.compressobj(compresslevel,
 | 
			
		||||
                                            zlib.DEFLATED,
 | 
			
		||||
                                            -zlib.MAX_WBITS,
 | 
			
		||||
                                            zlib.DEF_MEM_LEVEL,
 | 
			
		||||
                                            0)
 | 
			
		||||
        self.done = False
 | 
			
		||||
        self.first = True
 | 
			
		||||
        self.crc32 = 0
 | 
			
		||||
        self.total_size = 0
 | 
			
		||||
 | 
			
		||||
    def read(self, *a, **kw):
 | 
			
		||||
        if self.done:
 | 
			
		||||
            return ''
 | 
			
		||||
        x = self._f.read(*a, **kw)
 | 
			
		||||
        if x:
 | 
			
		||||
            self.crc32 = zlib.crc32(x, self.crc32) & 0xffffffffL
 | 
			
		||||
            self.total_size += len(x)
 | 
			
		||||
            compressed = self._compressor.compress(x)
 | 
			
		||||
            if not compressed:
 | 
			
		||||
                compressed = self._compressor.flush(zlib.Z_SYNC_FLUSH)
 | 
			
		||||
        else:
 | 
			
		||||
            compressed = self._compressor.flush(zlib.Z_FINISH)
 | 
			
		||||
            crc32 = struct.pack("<L", self.crc32 & 0xffffffffL)
 | 
			
		||||
            size = struct.pack("<L", self.total_size & 0xffffffffL)
 | 
			
		||||
            footer = crc32 + size
 | 
			
		||||
            compressed += footer
 | 
			
		||||
            self.done = True
 | 
			
		||||
        if self.first:
 | 
			
		||||
            self.first = False
 | 
			
		||||
            header = '\037\213\010\000\000\000\000\000\002\377'
 | 
			
		||||
            compressed = header + compressed
 | 
			
		||||
        return compressed
 | 
			
		||||
@@ -159,13 +159,16 @@ def check_float(string):
 | 
			
		||||
 | 
			
		||||
def check_utf8(string):
 | 
			
		||||
    """
 | 
			
		||||
    Validate if a string is valid UTF-8.
 | 
			
		||||
    Validate if a string is valid UTF-8 and has no NULL characters.
 | 
			
		||||
 | 
			
		||||
    :param string: string to be validated
 | 
			
		||||
    :returns: True if the string is valid utf-8, False otherwise
 | 
			
		||||
    :returns: True if the string is valid utf-8 and has no NULL characters,
 | 
			
		||||
              False otherwise
 | 
			
		||||
    """
 | 
			
		||||
    try:
 | 
			
		||||
        string.decode('UTF-8')
 | 
			
		||||
        return True
 | 
			
		||||
    except UnicodeDecodeError:
 | 
			
		||||
        return False
 | 
			
		||||
    if '\x00' in string:
 | 
			
		||||
        return False
 | 
			
		||||
    return True
 | 
			
		||||
 
 | 
			
		||||
@@ -1,210 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
import webob
 | 
			
		||||
from urllib import quote, unquote
 | 
			
		||||
from json import loads as json_loads
 | 
			
		||||
 | 
			
		||||
from swift.common.compressing_file_reader import CompressingFileReader
 | 
			
		||||
from swift.proxy.server import BaseApplication
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MemcacheStub(object):
 | 
			
		||||
 | 
			
		||||
    def get(self, *a, **kw):  # pragma: no cover
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def set(self, *a, **kw):  # pragma: no cover
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def incr(self, *a, **kw):  # pragma: no cover
 | 
			
		||||
        return 0
 | 
			
		||||
 | 
			
		||||
    def delete(self, *a, **kw):  # pragma: no cover
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def set_multi(self, *a, **kw):  # pragma: no cover
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def get_multi(self, *a, **kw):  # pragma: no cover
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def make_request_body_file(source_file, compress=True):
 | 
			
		||||
    if hasattr(source_file, 'seek'):
 | 
			
		||||
        source_file.seek(0)
 | 
			
		||||
    else:
 | 
			
		||||
        source_file = open(source_file, 'rb')
 | 
			
		||||
    if compress:
 | 
			
		||||
        compressed_file = CompressingFileReader(source_file)
 | 
			
		||||
        return compressed_file
 | 
			
		||||
    return source_file
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def webob_request_copy(orig_req, source_file=None, compress=True):
 | 
			
		||||
    req_copy = orig_req.copy()
 | 
			
		||||
    if source_file:
 | 
			
		||||
        req_copy.body_file = make_request_body_file(source_file,
 | 
			
		||||
                                                    compress=compress)
 | 
			
		||||
    req_copy.content_length = orig_req.content_length
 | 
			
		||||
    return req_copy
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class InternalProxy(object):
 | 
			
		||||
    """
 | 
			
		||||
    Set up a private instance of a proxy server that allows normal requests
 | 
			
		||||
    to be made without having to actually send the request to the proxy.
 | 
			
		||||
    This also doesn't log the requests to the normal proxy logs.
 | 
			
		||||
 | 
			
		||||
    :param proxy_server_conf: proxy server configuration dictionary
 | 
			
		||||
    :param logger: logger to log requests to
 | 
			
		||||
    :param retries: number of times to retry each request
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, proxy_server_conf=None, logger=None, retries=0):
 | 
			
		||||
        self.upload_app = BaseApplication(proxy_server_conf,
 | 
			
		||||
                                          memcache=MemcacheStub(),
 | 
			
		||||
                                          logger=logger)
 | 
			
		||||
        self.retries = retries
 | 
			
		||||
 | 
			
		||||
    def _handle_request(self, req, source_file=None, compress=True):
 | 
			
		||||
        req = self.upload_app.update_request(req)
 | 
			
		||||
        req_copy = webob_request_copy(req, source_file=source_file,
 | 
			
		||||
                                      compress=compress)
 | 
			
		||||
        resp = self.upload_app.handle_request(req_copy)
 | 
			
		||||
        tries = 1
 | 
			
		||||
        while (resp.status_int < 200 or resp.status_int > 299) \
 | 
			
		||||
                and tries < self.retries:
 | 
			
		||||
            req_copy = webob_request_copy(req, source_file=source_file,
 | 
			
		||||
                                          compress=compress)
 | 
			
		||||
            resp = self.upload_app.handle_request(req_copy)
 | 
			
		||||
            tries += 1
 | 
			
		||||
        return resp
 | 
			
		||||
 | 
			
		||||
    def upload_file(self, source_file, account, container, object_name,
 | 
			
		||||
                    compress=True, content_type='application/x-gzip',
 | 
			
		||||
                    etag=None):
 | 
			
		||||
        """
 | 
			
		||||
        Upload a file to cloud files.
 | 
			
		||||
 | 
			
		||||
        :param source_file: path to or file like object to upload
 | 
			
		||||
        :param account: account to upload to
 | 
			
		||||
        :param container: container to upload to
 | 
			
		||||
        :param object_name: name of object being uploaded
 | 
			
		||||
        :param compress: if True, compresses object as it is uploaded
 | 
			
		||||
        :param content_type: content-type of object
 | 
			
		||||
        :param etag: etag for object to check successful upload
 | 
			
		||||
        :returns: True if successful, False otherwise
 | 
			
		||||
        """
 | 
			
		||||
        target_name = '/v1/%s/%s/%s' % (account, container, object_name)
 | 
			
		||||
 | 
			
		||||
        # create the container
 | 
			
		||||
        if not self.create_container(account, container):
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        # upload the file to the account
 | 
			
		||||
        req = webob.Request.blank(target_name, content_type=content_type,
 | 
			
		||||
                            environ={'REQUEST_METHOD': 'PUT'},
 | 
			
		||||
                            headers={'Transfer-Encoding': 'chunked'})
 | 
			
		||||
        req.content_length = None   # to make sure we send chunked data
 | 
			
		||||
        if etag:
 | 
			
		||||
            req.headers['etag'] = etag
 | 
			
		||||
        resp = self._handle_request(req, source_file=source_file,
 | 
			
		||||
                                    compress=compress)
 | 
			
		||||
        if not (200 <= resp.status_int < 300):
 | 
			
		||||
            return False
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def get_object(self, account, container, object_name):
 | 
			
		||||
        """
 | 
			
		||||
        Get object.
 | 
			
		||||
 | 
			
		||||
        :param account: account name object is in
 | 
			
		||||
        :param container: container name object is in
 | 
			
		||||
        :param object_name: name of object to get
 | 
			
		||||
        :returns: iterator for object data
 | 
			
		||||
        """
 | 
			
		||||
        req = webob.Request.blank('/v1/%s/%s/%s' %
 | 
			
		||||
                            (account, container, object_name),
 | 
			
		||||
                            environ={'REQUEST_METHOD': 'GET'})
 | 
			
		||||
        resp = self._handle_request(req)
 | 
			
		||||
        return resp.status_int, resp.app_iter
 | 
			
		||||
 | 
			
		||||
    def create_container(self, account, container):
 | 
			
		||||
        """
 | 
			
		||||
        Create container.
 | 
			
		||||
 | 
			
		||||
        :param account: account name to put the container in
 | 
			
		||||
        :param container: container name to create
 | 
			
		||||
        :returns: True if successful, otherwise False
 | 
			
		||||
        """
 | 
			
		||||
        req = webob.Request.blank('/v1/%s/%s' % (account, container),
 | 
			
		||||
                            environ={'REQUEST_METHOD': 'PUT'})
 | 
			
		||||
        resp = self._handle_request(req)
 | 
			
		||||
        return 200 <= resp.status_int < 300
 | 
			
		||||
 | 
			
		||||
    def get_container_list(self, account, container, marker=None,
 | 
			
		||||
                           end_marker=None, limit=None, prefix=None,
 | 
			
		||||
                           delimiter=None, full_listing=True):
 | 
			
		||||
        """
 | 
			
		||||
        Get a listing of objects for the container.
 | 
			
		||||
 | 
			
		||||
        :param account: account name for the container
 | 
			
		||||
        :param container: container name to get a listing for
 | 
			
		||||
        :param marker: marker query
 | 
			
		||||
        :param end_marker: end marker query
 | 
			
		||||
        :param limit: limit query
 | 
			
		||||
        :param prefix: prefix query
 | 
			
		||||
        :param delimeter: string to delimit the queries on
 | 
			
		||||
        :param full_listing: if True, return a full listing, else returns a max
 | 
			
		||||
                             of 10000 listings
 | 
			
		||||
        :returns: list of objects
 | 
			
		||||
        """
 | 
			
		||||
        if full_listing:
 | 
			
		||||
            rv = []
 | 
			
		||||
            listing = self.get_container_list(account, container, marker,
 | 
			
		||||
                                              end_marker, limit, prefix,
 | 
			
		||||
                                              delimiter, full_listing=False)
 | 
			
		||||
            while listing:
 | 
			
		||||
                rv.extend(listing)
 | 
			
		||||
                if not delimiter:
 | 
			
		||||
                    marker = listing[-1]['name']
 | 
			
		||||
                else:
 | 
			
		||||
                    marker = listing[-1].get('name', listing[-1].get('subdir'))
 | 
			
		||||
                listing = self.get_container_list(account, container, marker,
 | 
			
		||||
                                                  end_marker, limit, prefix,
 | 
			
		||||
                                                  delimiter,
 | 
			
		||||
                                                  full_listing=False)
 | 
			
		||||
            return rv
 | 
			
		||||
        path = '/v1/%s/%s' % (account, quote(container))
 | 
			
		||||
        qs = 'format=json'
 | 
			
		||||
        if marker:
 | 
			
		||||
            qs += '&marker=%s' % quote(marker)
 | 
			
		||||
        if end_marker:
 | 
			
		||||
            qs += '&end_marker=%s' % quote(end_marker)
 | 
			
		||||
        if limit:
 | 
			
		||||
            qs += '&limit=%d' % limit
 | 
			
		||||
        if prefix:
 | 
			
		||||
            qs += '&prefix=%s' % quote(prefix)
 | 
			
		||||
        if delimiter:
 | 
			
		||||
            qs += '&delimiter=%s' % quote(delimiter)
 | 
			
		||||
        path += '?%s' % qs
 | 
			
		||||
        req = webob.Request.blank(path, environ={'REQUEST_METHOD': 'GET'})
 | 
			
		||||
        resp = self._handle_request(req)
 | 
			
		||||
        if resp.status_int < 200 or resp.status_int >= 300:
 | 
			
		||||
            return []  # TODO: distinguish between 404 and empty container
 | 
			
		||||
        if resp.status_int == 204:
 | 
			
		||||
            return []
 | 
			
		||||
        return json_loads(resp.body)
 | 
			
		||||
@@ -89,7 +89,7 @@ class TempAuth(object):
 | 
			
		||||
                    if ip == '0.0.0.0':
 | 
			
		||||
                        ip = '127.0.0.1'
 | 
			
		||||
                    url += ip
 | 
			
		||||
                    url += ':' + conf.get('bind_port', 80) + '/v1/' + \
 | 
			
		||||
                    url += ':' + conf.get('bind_port', '8080') + '/v1/' + \
 | 
			
		||||
                           self.reseller_prefix + conf_key.split('_')[1]
 | 
			
		||||
                groups = values
 | 
			
		||||
                self.users[conf_key.split('_', 1)[1].replace('_', ':')] = {
 | 
			
		||||
 
 | 
			
		||||
@@ -42,6 +42,7 @@ from eventlet import greenio, GreenPool, sleep, Timeout, listen
 | 
			
		||||
from eventlet.green import socket, subprocess, ssl, thread, threading
 | 
			
		||||
import netifaces
 | 
			
		||||
 | 
			
		||||
from swift.common.constraints import check_utf8
 | 
			
		||||
from swift.common.exceptions import LockTimeout, MessageTimeout
 | 
			
		||||
 | 
			
		||||
# logging doesn't import patched as cleanly as one would like
 | 
			
		||||
@@ -74,6 +75,8 @@ if hash_conf.read('/etc/swift/swift.conf'):
 | 
			
		||||
# Used when reading config values
 | 
			
		||||
TRUE_VALUES = set(('true', '1', 'yes', 'on', 't', 'y'))
 | 
			
		||||
 | 
			
		||||
# Used with xml.sax.saxutils.escape
 | 
			
		||||
XML_EXTRA_ENTITIES = dict((chr(x), '&#x%x;' % x) for x in xrange(1, 0x20))
 | 
			
		||||
 | 
			
		||||
def validate_configuration():
 | 
			
		||||
    if HASH_PATH_SUFFIX == '':
 | 
			
		||||
@@ -110,8 +113,8 @@ def get_param(req, name, default=None):
 | 
			
		||||
    :returns: HTTP request parameter value
 | 
			
		||||
    """
 | 
			
		||||
    value = req.str_params.get(name, default)
 | 
			
		||||
    if value:
 | 
			
		||||
        value.decode('utf8')    # Ensure UTF8ness
 | 
			
		||||
    if value and not check_utf8(value):
 | 
			
		||||
        raise ValueError('Not valid UTF-8 or contains NULL characters')
 | 
			
		||||
    return value
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -144,12 +147,12 @@ def drop_buffer_cache(fd, offset, length):
 | 
			
		||||
    """
 | 
			
		||||
    global _posix_fadvise
 | 
			
		||||
    if _posix_fadvise is None:
 | 
			
		||||
        _posix_fadvise = load_libc_function('posix_fadvise')
 | 
			
		||||
        _posix_fadvise = load_libc_function('posix_fadvise64')
 | 
			
		||||
    # 4 means "POSIX_FADV_DONTNEED"
 | 
			
		||||
    ret = _posix_fadvise(fd, ctypes.c_uint64(offset),
 | 
			
		||||
                        ctypes.c_uint64(length), 4)
 | 
			
		||||
    if ret != 0:
 | 
			
		||||
        logging.warn("posix_fadvise(%s, %s, %s, 4) -> %s"
 | 
			
		||||
        logging.warn("posix_fadvise64(%s, %s, %s, 4) -> %s"
 | 
			
		||||
                     % (fd, offset, length, ret))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -33,7 +33,7 @@ from webob.exc import HTTPAccepted, HTTPBadRequest, HTTPConflict, \
 | 
			
		||||
from swift.common.db import ContainerBroker
 | 
			
		||||
from swift.common.utils import get_logger, get_param, hash_path, \
 | 
			
		||||
    normalize_timestamp, storage_directory, split_path, urlparse, \
 | 
			
		||||
    validate_sync_to
 | 
			
		||||
    validate_sync_to, XML_EXTRA_ENTITIES
 | 
			
		||||
from swift.common.constraints import CONTAINER_LISTING_LIMIT, \
 | 
			
		||||
    check_mount, check_float, check_utf8
 | 
			
		||||
from swift.common.bufferedhttp import http_connect
 | 
			
		||||
@@ -172,6 +172,10 @@ class ContainerController(object):
 | 
			
		||||
        try:
 | 
			
		||||
            drive, part, account, container, obj = split_path(
 | 
			
		||||
                unquote(req.path), 4, 5, True)
 | 
			
		||||
            if (account and not check_utf8(account)) or \
 | 
			
		||||
                    (container and not check_utf8(container)) or \
 | 
			
		||||
                    (obj and not check_utf8(obj)):
 | 
			
		||||
                raise ValueError('NULL characters not allowed in names')
 | 
			
		||||
        except ValueError, err:
 | 
			
		||||
            return HTTPBadRequest(body=str(err), content_type='text/plain',
 | 
			
		||||
                                request=req)
 | 
			
		||||
@@ -289,7 +293,7 @@ class ContainerController(object):
 | 
			
		||||
                    return HTTPPreconditionFailed(request=req,
 | 
			
		||||
                        body='Maximum limit is %d' % CONTAINER_LISTING_LIMIT)
 | 
			
		||||
            query_format = get_param(req, 'format')
 | 
			
		||||
        except UnicodeDecodeError, err:
 | 
			
		||||
        except (UnicodeDecodeError, ValueError), err:
 | 
			
		||||
            return HTTPBadRequest(body='parameters not utf8',
 | 
			
		||||
                                  content_type='text/plain', request=req)
 | 
			
		||||
        if query_format:
 | 
			
		||||
@@ -324,21 +328,23 @@ class ContainerController(object):
 | 
			
		||||
            xml_output = []
 | 
			
		||||
            for (name, created_at, size, content_type, etag) in container_list:
 | 
			
		||||
                # escape name and format date here
 | 
			
		||||
                name = saxutils.escape(name)
 | 
			
		||||
                name = saxutils.escape(name, XML_EXTRA_ENTITIES)
 | 
			
		||||
                created_at = datetime.utcfromtimestamp(
 | 
			
		||||
                    float(created_at)).isoformat()
 | 
			
		||||
                if content_type is None:
 | 
			
		||||
                    xml_output.append('<subdir name="%s"><name>%s</name>'
 | 
			
		||||
                                      '</subdir>' % (name, name))
 | 
			
		||||
                else:
 | 
			
		||||
                    content_type = saxutils.escape(content_type)
 | 
			
		||||
                    content_type = saxutils.escape(content_type,
 | 
			
		||||
                                                   XML_EXTRA_ENTITIES)
 | 
			
		||||
                    xml_output.append('<object><name>%s</name><hash>%s</hash>'\
 | 
			
		||||
                           '<bytes>%d</bytes><content_type>%s</content_type>'\
 | 
			
		||||
                           '<last_modified>%s</last_modified></object>' % \
 | 
			
		||||
                           (name, etag, size, content_type, created_at))
 | 
			
		||||
            container_list = ''.join([
 | 
			
		||||
                '<?xml version="1.0" encoding="UTF-8"?>\n',
 | 
			
		||||
                '<container name=%s>' % saxutils.quoteattr(container),
 | 
			
		||||
                '<container name=%s>' % 
 | 
			
		||||
                    saxutils.quoteattr(container, XML_EXTRA_ENTITIES),
 | 
			
		||||
                ''.join(xml_output), '</container>'])
 | 
			
		||||
        else:
 | 
			
		||||
            if not container_list:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,250 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
import collections
 | 
			
		||||
from urllib import unquote
 | 
			
		||||
import copy
 | 
			
		||||
 | 
			
		||||
from swift.common.utils import split_path, get_logger
 | 
			
		||||
 | 
			
		||||
month_map = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
 | 
			
		||||
LISTING_PARAMS = set(
 | 
			
		||||
                'path limit format delimiter marker end_marker prefix'.split())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AccessLogProcessor(object):
 | 
			
		||||
    """Transform proxy server access logs"""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, conf):
 | 
			
		||||
        self.server_name = conf.get('server_name', 'proxy-server')
 | 
			
		||||
        self.lb_private_ips = [x.strip() for x in \
 | 
			
		||||
                               conf.get('lb_private_ips', '').split(',')\
 | 
			
		||||
                               if x.strip()]
 | 
			
		||||
        self.service_ips = [x.strip() for x in \
 | 
			
		||||
                            conf.get('service_ips', '').split(',')\
 | 
			
		||||
                            if x.strip()]
 | 
			
		||||
        self.warn_percent = float(conf.get('warn_percent', '0.8'))
 | 
			
		||||
        self.logger = get_logger(conf, log_route='access-processor')
 | 
			
		||||
 | 
			
		||||
    def log_line_parser(self, raw_log):
 | 
			
		||||
        '''given a raw access log line, return a dict of the good parts'''
 | 
			
		||||
        d = {}
 | 
			
		||||
        try:
 | 
			
		||||
            (unused,
 | 
			
		||||
            server,
 | 
			
		||||
            client_ip,
 | 
			
		||||
            lb_ip,
 | 
			
		||||
            timestamp,
 | 
			
		||||
            method,
 | 
			
		||||
            request,
 | 
			
		||||
            http_version,
 | 
			
		||||
            code,
 | 
			
		||||
            referrer,
 | 
			
		||||
            user_agent,
 | 
			
		||||
            auth_token,
 | 
			
		||||
            bytes_in,
 | 
			
		||||
            bytes_out,
 | 
			
		||||
            etag,
 | 
			
		||||
            trans_id,
 | 
			
		||||
            headers,
 | 
			
		||||
            processing_time) = (unquote(x) for x in
 | 
			
		||||
                                raw_log[16:].split(' ')[:18])
 | 
			
		||||
        except ValueError:
 | 
			
		||||
            self.logger.debug(_('Bad line data: %s') % repr(raw_log))
 | 
			
		||||
            return {}
 | 
			
		||||
        if server != self.server_name:
 | 
			
		||||
            # incorrect server name in log line
 | 
			
		||||
            self.logger.debug(_('Bad server name: found "%(found)s" ' \
 | 
			
		||||
                    'expected "%(expected)s"') %
 | 
			
		||||
                    {'found': server, 'expected': self.server_name})
 | 
			
		||||
            return {}
 | 
			
		||||
        try:
 | 
			
		||||
            (version, account, container_name, object_name) = \
 | 
			
		||||
                split_path(request, 2, 4, True)
 | 
			
		||||
        except ValueError, e:
 | 
			
		||||
            self.logger.debug(_('Invalid path: %(error)s from data: %(log)s') %
 | 
			
		||||
            {'error': e, 'log': repr(raw_log)})
 | 
			
		||||
            return {}
 | 
			
		||||
        if container_name is not None:
 | 
			
		||||
            container_name = container_name.split('?', 1)[0]
 | 
			
		||||
        if object_name is not None:
 | 
			
		||||
            object_name = object_name.split('?', 1)[0]
 | 
			
		||||
        account = account.split('?', 1)[0]
 | 
			
		||||
        query = None
 | 
			
		||||
        if '?' in request:
 | 
			
		||||
            request, query = request.split('?', 1)
 | 
			
		||||
            args = query.split('&')
 | 
			
		||||
            # Count each query argument. This is used later to aggregate
 | 
			
		||||
            # the number of format, prefix, etc. queries.
 | 
			
		||||
            for q in args:
 | 
			
		||||
                if '=' in q:
 | 
			
		||||
                    k, v = q.split('=', 1)
 | 
			
		||||
                else:
 | 
			
		||||
                    k = q
 | 
			
		||||
                # Certain keys will get summmed in stats reporting
 | 
			
		||||
                # (format, path, delimiter, etc.). Save a "1" here
 | 
			
		||||
                # to indicate that this request is 1 request for
 | 
			
		||||
                # its respective key.
 | 
			
		||||
                if k in LISTING_PARAMS:
 | 
			
		||||
                    d[k] = 1
 | 
			
		||||
        d['client_ip'] = client_ip
 | 
			
		||||
        d['lb_ip'] = lb_ip
 | 
			
		||||
        d['method'] = method
 | 
			
		||||
        d['request'] = request
 | 
			
		||||
        if query:
 | 
			
		||||
            d['query'] = query
 | 
			
		||||
        d['http_version'] = http_version
 | 
			
		||||
        d['code'] = code
 | 
			
		||||
        d['referrer'] = referrer
 | 
			
		||||
        d['user_agent'] = user_agent
 | 
			
		||||
        d['auth_token'] = auth_token
 | 
			
		||||
        d['bytes_in'] = bytes_in
 | 
			
		||||
        d['bytes_out'] = bytes_out
 | 
			
		||||
        d['etag'] = etag
 | 
			
		||||
        d['trans_id'] = trans_id
 | 
			
		||||
        d['processing_time'] = processing_time
 | 
			
		||||
        day, month, year, hour, minute, second = timestamp.split('/')
 | 
			
		||||
        d['day'] = day
 | 
			
		||||
        month = ('%02s' % month_map.index(month)).replace(' ', '0')
 | 
			
		||||
        d['month'] = month
 | 
			
		||||
        d['year'] = year
 | 
			
		||||
        d['hour'] = hour
 | 
			
		||||
        d['minute'] = minute
 | 
			
		||||
        d['second'] = second
 | 
			
		||||
        d['tz'] = '+0000'
 | 
			
		||||
        d['account'] = account
 | 
			
		||||
        d['container_name'] = container_name
 | 
			
		||||
        d['object_name'] = object_name
 | 
			
		||||
        d['bytes_out'] = int(d['bytes_out'].replace('-', '0'))
 | 
			
		||||
        d['bytes_in'] = int(d['bytes_in'].replace('-', '0'))
 | 
			
		||||
        d['code'] = int(d['code'])
 | 
			
		||||
        return d
 | 
			
		||||
 | 
			
		||||
    def process(self, obj_stream, data_object_account, data_object_container,
 | 
			
		||||
                data_object_name):
 | 
			
		||||
        '''generate hourly groupings of data from one access log file'''
 | 
			
		||||
        hourly_aggr_info = {}
 | 
			
		||||
        total_lines = 0
 | 
			
		||||
        bad_lines = 0
 | 
			
		||||
        for line in obj_stream:
 | 
			
		||||
            line_data = self.log_line_parser(line)
 | 
			
		||||
            total_lines += 1
 | 
			
		||||
            if not line_data:
 | 
			
		||||
                bad_lines += 1
 | 
			
		||||
                continue
 | 
			
		||||
            account = line_data['account']
 | 
			
		||||
            container_name = line_data['container_name']
 | 
			
		||||
            year = line_data['year']
 | 
			
		||||
            month = line_data['month']
 | 
			
		||||
            day = line_data['day']
 | 
			
		||||
            hour = line_data['hour']
 | 
			
		||||
            bytes_out = line_data['bytes_out']
 | 
			
		||||
            bytes_in = line_data['bytes_in']
 | 
			
		||||
            method = line_data['method']
 | 
			
		||||
            code = int(line_data['code'])
 | 
			
		||||
            object_name = line_data['object_name']
 | 
			
		||||
            client_ip = line_data['client_ip']
 | 
			
		||||
 | 
			
		||||
            op_level = None
 | 
			
		||||
            if not container_name:
 | 
			
		||||
                op_level = 'account'
 | 
			
		||||
            elif container_name and not object_name:
 | 
			
		||||
                op_level = 'container'
 | 
			
		||||
            elif object_name:
 | 
			
		||||
                op_level = 'object'
 | 
			
		||||
 | 
			
		||||
            aggr_key = (account, year, month, day, hour)
 | 
			
		||||
            d = hourly_aggr_info.get(aggr_key, {})
 | 
			
		||||
            if line_data['lb_ip'] in self.lb_private_ips:
 | 
			
		||||
                source = 'service'
 | 
			
		||||
            else:
 | 
			
		||||
                source = 'public'
 | 
			
		||||
 | 
			
		||||
            if line_data['client_ip'] in self.service_ips:
 | 
			
		||||
                source = 'service'
 | 
			
		||||
 | 
			
		||||
            d[(source, 'bytes_out')] = d.setdefault((
 | 
			
		||||
                source, 'bytes_out'), 0) + bytes_out
 | 
			
		||||
            d[(source, 'bytes_in')] = d.setdefault((source, 'bytes_in'), 0) + \
 | 
			
		||||
                                      bytes_in
 | 
			
		||||
 | 
			
		||||
            d['format_query'] = d.setdefault('format_query', 0) + \
 | 
			
		||||
                                line_data.get('format', 0)
 | 
			
		||||
            d['marker_query'] = d.setdefault('marker_query', 0) + \
 | 
			
		||||
                                line_data.get('marker', 0)
 | 
			
		||||
            d['prefix_query'] = d.setdefault('prefix_query', 0) + \
 | 
			
		||||
                                line_data.get('prefix', 0)
 | 
			
		||||
            d['delimiter_query'] = d.setdefault('delimiter_query', 0) + \
 | 
			
		||||
                                   line_data.get('delimiter', 0)
 | 
			
		||||
            path = line_data.get('path', 0)
 | 
			
		||||
            d['path_query'] = d.setdefault('path_query', 0) + path
 | 
			
		||||
 | 
			
		||||
            code = '%dxx' % (code / 100)
 | 
			
		||||
            key = (source, op_level, method, code)
 | 
			
		||||
            d[key] = d.setdefault(key, 0) + 1
 | 
			
		||||
 | 
			
		||||
            hourly_aggr_info[aggr_key] = d
 | 
			
		||||
        if bad_lines > (total_lines * self.warn_percent):
 | 
			
		||||
            name = '/'.join([data_object_account, data_object_container,
 | 
			
		||||
                             data_object_name])
 | 
			
		||||
            self.logger.warning(_('I found a bunch of bad lines in %(name)s '\
 | 
			
		||||
                    '(%(bad)d bad, %(total)d total)') %
 | 
			
		||||
                    {'name': name, 'bad': bad_lines, 'total': total_lines})
 | 
			
		||||
        return hourly_aggr_info
 | 
			
		||||
 | 
			
		||||
    def keylist_mapping(self):
 | 
			
		||||
        source_keys = 'service public'.split()
 | 
			
		||||
        level_keys = 'account container object'.split()
 | 
			
		||||
        verb_keys = 'GET PUT POST DELETE HEAD COPY'.split()
 | 
			
		||||
        code_keys = '2xx 4xx 5xx'.split()
 | 
			
		||||
 | 
			
		||||
        keylist_mapping = {
 | 
			
		||||
        #   <db key> : <row key> or <set of row keys>
 | 
			
		||||
            'service_bw_in': ('service', 'bytes_in'),
 | 
			
		||||
            'service_bw_out': ('service', 'bytes_out'),
 | 
			
		||||
            'public_bw_in': ('public', 'bytes_in'),
 | 
			
		||||
            'public_bw_out': ('public', 'bytes_out'),
 | 
			
		||||
            'account_requests': set(),
 | 
			
		||||
            'container_requests': set(),
 | 
			
		||||
            'object_requests': set(),
 | 
			
		||||
            'service_request': set(),
 | 
			
		||||
            'public_request': set(),
 | 
			
		||||
            'ops_count': set(),
 | 
			
		||||
        }
 | 
			
		||||
        for verb in verb_keys:
 | 
			
		||||
            keylist_mapping[verb] = set()
 | 
			
		||||
        for code in code_keys:
 | 
			
		||||
            keylist_mapping[code] = set()
 | 
			
		||||
        for source in source_keys:
 | 
			
		||||
            for level in level_keys:
 | 
			
		||||
                for verb in verb_keys:
 | 
			
		||||
                    for code in code_keys:
 | 
			
		||||
                        keylist_mapping['account_requests'].add(
 | 
			
		||||
                                        (source, 'account', verb, code))
 | 
			
		||||
                        keylist_mapping['container_requests'].add(
 | 
			
		||||
                                        (source, 'container', verb, code))
 | 
			
		||||
                        keylist_mapping['object_requests'].add(
 | 
			
		||||
                                        (source, 'object', verb, code))
 | 
			
		||||
                        keylist_mapping['service_request'].add(
 | 
			
		||||
                                        ('service', level, verb, code))
 | 
			
		||||
                        keylist_mapping['public_request'].add(
 | 
			
		||||
                                        ('public', level, verb, code))
 | 
			
		||||
                        keylist_mapping[verb].add(
 | 
			
		||||
                                        (source, level, verb, code))
 | 
			
		||||
                        keylist_mapping[code].add(
 | 
			
		||||
                                        (source, level, verb, code))
 | 
			
		||||
                        keylist_mapping['ops_count'].add(
 | 
			
		||||
                                        (source, level, verb, code))
 | 
			
		||||
        return keylist_mapping
 | 
			
		||||
@@ -1,177 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
from paste.deploy import appconfig
 | 
			
		||||
import shutil
 | 
			
		||||
import hashlib
 | 
			
		||||
import urllib
 | 
			
		||||
 | 
			
		||||
from swift.account.server import DATADIR as account_server_data_dir
 | 
			
		||||
from swift.container.server import DATADIR as container_server_data_dir
 | 
			
		||||
from swift.common.db import AccountBroker, ContainerBroker
 | 
			
		||||
from swift.common.utils import renamer, get_logger, readconf, mkdirs, \
 | 
			
		||||
    TRUE_VALUES, remove_file
 | 
			
		||||
from swift.common.constraints import check_mount
 | 
			
		||||
from swift.common.daemon import Daemon
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DatabaseStatsCollector(Daemon):
 | 
			
		||||
    """
 | 
			
		||||
    Extract storage stats from account databases on the account
 | 
			
		||||
    storage nodes
 | 
			
		||||
 | 
			
		||||
    Any subclasses must define the function get_data.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, stats_conf, stats_type, data_dir, filename_format):
 | 
			
		||||
        super(DatabaseStatsCollector, self).__init__(stats_conf)
 | 
			
		||||
        self.stats_type = stats_type
 | 
			
		||||
        self.data_dir = data_dir
 | 
			
		||||
        self.filename_format = filename_format
 | 
			
		||||
        self.devices = stats_conf.get('devices', '/srv/node')
 | 
			
		||||
        self.mount_check = stats_conf.get('mount_check',
 | 
			
		||||
                                           'true').lower() in TRUE_VALUES
 | 
			
		||||
        self.target_dir = stats_conf.get('log_dir', '/var/log/swift')
 | 
			
		||||
        mkdirs(self.target_dir)
 | 
			
		||||
        self.logger = get_logger(stats_conf,
 | 
			
		||||
                                 log_route='%s-stats' % stats_type)
 | 
			
		||||
 | 
			
		||||
    def run_once(self, *args, **kwargs):
 | 
			
		||||
        self.logger.info(_("Gathering %s stats" % self.stats_type))
 | 
			
		||||
        start = time.time()
 | 
			
		||||
        self.find_and_process()
 | 
			
		||||
        self.logger.info(_("Gathering %s stats complete (%0.2f minutes)") %
 | 
			
		||||
                         (self.stats_type, (time.time() - start) / 60))
 | 
			
		||||
 | 
			
		||||
    def get_data(self):
 | 
			
		||||
        raise NotImplementedError('Subclasses must override')
 | 
			
		||||
 | 
			
		||||
    def get_header(self):
 | 
			
		||||
        raise NotImplementedError('Subclasses must override')
 | 
			
		||||
 | 
			
		||||
    def find_and_process(self):
 | 
			
		||||
        src_filename = time.strftime(self.filename_format)
 | 
			
		||||
        working_dir = os.path.join(self.target_dir,
 | 
			
		||||
                                   '.%-stats_tmp' % self.stats_type)
 | 
			
		||||
        shutil.rmtree(working_dir, ignore_errors=True)
 | 
			
		||||
        mkdirs(working_dir)
 | 
			
		||||
        tmp_filename = os.path.join(working_dir, src_filename)
 | 
			
		||||
        hasher = hashlib.md5()
 | 
			
		||||
        try:
 | 
			
		||||
            with open(tmp_filename, 'wb') as statfile:
 | 
			
		||||
                statfile.write(self.get_header())
 | 
			
		||||
                for device in os.listdir(self.devices):
 | 
			
		||||
                    if self.mount_check and not check_mount(self.devices,
 | 
			
		||||
                                                            device):
 | 
			
		||||
                        self.logger.error(
 | 
			
		||||
                            _("Device %s is not mounted, skipping.") % device)
 | 
			
		||||
                        continue
 | 
			
		||||
                    db_dir = os.path.join(self.devices, device, self.data_dir)
 | 
			
		||||
                    if not os.path.exists(db_dir):
 | 
			
		||||
                        self.logger.debug(
 | 
			
		||||
                            _("Path %s does not exist, skipping.") % db_dir)
 | 
			
		||||
                        continue
 | 
			
		||||
                    for root, dirs, files in os.walk(db_dir, topdown=False):
 | 
			
		||||
                        for filename in files:
 | 
			
		||||
                            if filename.endswith('.db'):
 | 
			
		||||
                                db_path = os.path.join(root, filename)
 | 
			
		||||
                                line_data = self.get_data(db_path)
 | 
			
		||||
                                if line_data:
 | 
			
		||||
                                    statfile.write(line_data)
 | 
			
		||||
                                    hasher.update(line_data)
 | 
			
		||||
 | 
			
		||||
            src_filename += hasher.hexdigest()
 | 
			
		||||
            renamer(tmp_filename, os.path.join(self.target_dir, src_filename))
 | 
			
		||||
        finally:
 | 
			
		||||
            shutil.rmtree(working_dir, ignore_errors=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AccountStatsCollector(DatabaseStatsCollector):
 | 
			
		||||
    """
 | 
			
		||||
    Extract storage stats from account databases on the account
 | 
			
		||||
    storage nodes
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, stats_conf):
 | 
			
		||||
        super(AccountStatsCollector, self).__init__(stats_conf, 'account',
 | 
			
		||||
                                                    account_server_data_dir,
 | 
			
		||||
                                                    'stats-%Y%m%d%H_')
 | 
			
		||||
 | 
			
		||||
    def get_data(self, db_path):
 | 
			
		||||
        """
 | 
			
		||||
        Data for generated csv has the following columns:
 | 
			
		||||
        Account Hash, Container Count, Object Count, Bytes Used
 | 
			
		||||
        """
 | 
			
		||||
        line_data = None
 | 
			
		||||
        broker = AccountBroker(db_path)
 | 
			
		||||
        if not broker.is_deleted():
 | 
			
		||||
            info = broker.get_info()
 | 
			
		||||
            line_data = '"%s",%d,%d,%d\n' % (info['account'],
 | 
			
		||||
                                             info['container_count'],
 | 
			
		||||
                                             info['object_count'],
 | 
			
		||||
                                             info['bytes_used'])
 | 
			
		||||
        return line_data
 | 
			
		||||
 | 
			
		||||
    def get_header(self):
 | 
			
		||||
        return ''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ContainerStatsCollector(DatabaseStatsCollector):
 | 
			
		||||
    """
 | 
			
		||||
    Extract storage stats from container databases on the container
 | 
			
		||||
    storage nodes
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, stats_conf):
 | 
			
		||||
        super(ContainerStatsCollector, self).__init__(stats_conf, 'container',
 | 
			
		||||
                                                   container_server_data_dir,
 | 
			
		||||
                                                   'container-stats-%Y%m%d%H_')
 | 
			
		||||
        # webob calls title on all the header keys
 | 
			
		||||
        self.metadata_keys = ['X-Container-Meta-%s' % mkey.strip().title()
 | 
			
		||||
                for mkey in stats_conf.get('metadata_keys', '').split(',')
 | 
			
		||||
                if mkey.strip()]
 | 
			
		||||
 | 
			
		||||
    def get_header(self):
 | 
			
		||||
        header = 'Account Hash,Container Name,Object Count,Bytes Used'
 | 
			
		||||
        if self.metadata_keys:
 | 
			
		||||
            xtra_headers = ','.join(self.metadata_keys)
 | 
			
		||||
            header += ',%s' % xtra_headers
 | 
			
		||||
        header += '\n'
 | 
			
		||||
        return header
 | 
			
		||||
 | 
			
		||||
    def get_data(self, db_path):
 | 
			
		||||
        """
 | 
			
		||||
        Data for generated csv has the following columns:
 | 
			
		||||
        Account Hash, Container Name, Object Count, Bytes Used
 | 
			
		||||
        This will just collect whether or not the metadata is set
 | 
			
		||||
        using a 1 or ''.
 | 
			
		||||
        """
 | 
			
		||||
        line_data = None
 | 
			
		||||
        broker = ContainerBroker(db_path)
 | 
			
		||||
        if not broker.is_deleted():
 | 
			
		||||
            info = broker.get_info(include_metadata=bool(self.metadata_keys))
 | 
			
		||||
            encoded_container_name = urllib.quote(info['container'])
 | 
			
		||||
            line_data = '"%s","%s",%d,%d' % (
 | 
			
		||||
                info['account'], encoded_container_name,
 | 
			
		||||
                info['object_count'], info['bytes_used'])
 | 
			
		||||
            if self.metadata_keys:
 | 
			
		||||
                metadata_results = ','.join(
 | 
			
		||||
                    [info['metadata'].get(mkey) and '1' or ''
 | 
			
		||||
                     for mkey in self.metadata_keys])
 | 
			
		||||
                line_data += ',%s' % metadata_results
 | 
			
		||||
            line_data += '\n'
 | 
			
		||||
        return line_data
 | 
			
		||||
@@ -1,581 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
from ConfigParser import ConfigParser
 | 
			
		||||
import zlib
 | 
			
		||||
import time
 | 
			
		||||
import datetime
 | 
			
		||||
import cStringIO
 | 
			
		||||
import collections
 | 
			
		||||
from paste.deploy import appconfig
 | 
			
		||||
import multiprocessing
 | 
			
		||||
import Queue
 | 
			
		||||
import cPickle
 | 
			
		||||
import hashlib
 | 
			
		||||
 | 
			
		||||
from swift.common.internal_proxy import InternalProxy
 | 
			
		||||
from swift.common.exceptions import ChunkReadTimeout
 | 
			
		||||
from swift.common.utils import get_logger, readconf, TRUE_VALUES
 | 
			
		||||
from swift.common.daemon import Daemon
 | 
			
		||||
 | 
			
		||||
now = datetime.datetime.now
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BadFileDownload(Exception):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, status_code=None):
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LogProcessor(object):
 | 
			
		||||
    """Load plugins, process logs"""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, conf, logger):
 | 
			
		||||
        if isinstance(logger, tuple):
 | 
			
		||||
            self.logger = get_logger(*logger, log_route='log-processor')
 | 
			
		||||
        else:
 | 
			
		||||
            self.logger = logger
 | 
			
		||||
 | 
			
		||||
        self.conf = conf
 | 
			
		||||
        self._internal_proxy = None
 | 
			
		||||
 | 
			
		||||
        # load the processing plugins
 | 
			
		||||
        self.plugins = {}
 | 
			
		||||
        plugin_prefix = 'log-processor-'
 | 
			
		||||
        for section in (x for x in conf if x.startswith(plugin_prefix)):
 | 
			
		||||
            plugin_name = section[len(plugin_prefix):]
 | 
			
		||||
            plugin_conf = conf.get(section, {})
 | 
			
		||||
            if plugin_conf.get('processable', 'true').lower() not in \
 | 
			
		||||
                    TRUE_VALUES:
 | 
			
		||||
                continue
 | 
			
		||||
            self.plugins[plugin_name] = plugin_conf
 | 
			
		||||
            class_path = self.plugins[plugin_name]['class_path']
 | 
			
		||||
            import_target, class_name = class_path.rsplit('.', 1)
 | 
			
		||||
            module = __import__(import_target, fromlist=[import_target])
 | 
			
		||||
            klass = getattr(module, class_name)
 | 
			
		||||
            self.plugins[plugin_name]['instance'] = klass(plugin_conf)
 | 
			
		||||
            self.logger.debug(_('Loaded plugin "%s"') % plugin_name)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def internal_proxy(self):
 | 
			
		||||
        if self._internal_proxy is None:
 | 
			
		||||
            stats_conf = self.conf.get('log-processor', {})
 | 
			
		||||
            proxy_server_conf_loc = stats_conf.get('proxy_server_conf',
 | 
			
		||||
                                            '/etc/swift/proxy-server.conf')
 | 
			
		||||
            proxy_server_conf = appconfig(
 | 
			
		||||
                                        'config:%s' % proxy_server_conf_loc,
 | 
			
		||||
                                        name='proxy-server')
 | 
			
		||||
            self._internal_proxy = InternalProxy(proxy_server_conf,
 | 
			
		||||
                                                 self.logger,
 | 
			
		||||
                                                 retries=3)
 | 
			
		||||
        return self._internal_proxy
 | 
			
		||||
 | 
			
		||||
    def process_one_file(self, plugin_name, account, container, object_name):
 | 
			
		||||
        self.logger.info(_('Processing %(obj)s with plugin "%(plugin)s"') %
 | 
			
		||||
                    {'obj': '/'.join((account, container, object_name)),
 | 
			
		||||
                     'plugin': plugin_name})
 | 
			
		||||
        # get an iter of the object data
 | 
			
		||||
        compressed = object_name.endswith('.gz')
 | 
			
		||||
        stream = self.get_object_data(account, container, object_name,
 | 
			
		||||
                                      compressed=compressed)
 | 
			
		||||
        # look up the correct plugin and send the stream to it
 | 
			
		||||
        return self.plugins[plugin_name]['instance'].process(stream,
 | 
			
		||||
                                                             account,
 | 
			
		||||
                                                             container,
 | 
			
		||||
                                                             object_name)
 | 
			
		||||
 | 
			
		||||
    def get_data_list(self, start_date=None, end_date=None,
 | 
			
		||||
                      listing_filter=None):
 | 
			
		||||
        total_list = []
 | 
			
		||||
        for plugin_name, data in self.plugins.items():
 | 
			
		||||
            account = data['swift_account']
 | 
			
		||||
            container = data['container_name']
 | 
			
		||||
            listing = self.get_container_listing(account,
 | 
			
		||||
                                                 container,
 | 
			
		||||
                                                 start_date,
 | 
			
		||||
                                                 end_date)
 | 
			
		||||
            for object_name in listing:
 | 
			
		||||
                # The items in this list end up being passed as positional
 | 
			
		||||
                # parameters to process_one_file.
 | 
			
		||||
                x = (plugin_name, account, container, object_name)
 | 
			
		||||
                if x not in listing_filter:
 | 
			
		||||
                    total_list.append(x)
 | 
			
		||||
        return total_list
 | 
			
		||||
 | 
			
		||||
    def get_container_listing(self, swift_account, container_name,
 | 
			
		||||
                              start_date=None, end_date=None,
 | 
			
		||||
                              listing_filter=None):
 | 
			
		||||
        '''
 | 
			
		||||
        Get a container listing, filtered by start_date, end_date, and
 | 
			
		||||
        listing_filter. Dates, if given, must be in YYYYMMDDHH format
 | 
			
		||||
        '''
 | 
			
		||||
        search_key = None
 | 
			
		||||
        if start_date is not None:
 | 
			
		||||
            try:
 | 
			
		||||
                parsed_date = time.strptime(start_date, '%Y%m%d%H')
 | 
			
		||||
            except ValueError:
 | 
			
		||||
                pass
 | 
			
		||||
            else:
 | 
			
		||||
                year = '%04d' % parsed_date.tm_year
 | 
			
		||||
                month = '%02d' % parsed_date.tm_mon
 | 
			
		||||
                day = '%02d' % parsed_date.tm_mday
 | 
			
		||||
                hour = '%02d' % parsed_date.tm_hour
 | 
			
		||||
                search_key = '/'.join([year, month, day, hour])
 | 
			
		||||
        end_key = None
 | 
			
		||||
        if end_date is not None:
 | 
			
		||||
            try:
 | 
			
		||||
                parsed_date = time.strptime(end_date, '%Y%m%d%H')
 | 
			
		||||
            except ValueError:
 | 
			
		||||
                pass
 | 
			
		||||
            else:
 | 
			
		||||
                year = '%04d' % parsed_date.tm_year
 | 
			
		||||
                month = '%02d' % parsed_date.tm_mon
 | 
			
		||||
                day = '%02d' % parsed_date.tm_mday
 | 
			
		||||
                # Since the end_marker filters by <, we need to add something
 | 
			
		||||
                # to make sure we get all the data under the last hour. Adding
 | 
			
		||||
                # one to the hour should be all-inclusive.
 | 
			
		||||
                hour = '%02d' % (parsed_date.tm_hour + 1)
 | 
			
		||||
                end_key = '/'.join([year, month, day, hour])
 | 
			
		||||
        container_listing = self.internal_proxy.get_container_list(
 | 
			
		||||
                                    swift_account,
 | 
			
		||||
                                    container_name,
 | 
			
		||||
                                    marker=search_key,
 | 
			
		||||
                                    end_marker=end_key)
 | 
			
		||||
        results = []
 | 
			
		||||
        if listing_filter is None:
 | 
			
		||||
            listing_filter = set()
 | 
			
		||||
        for item in container_listing:
 | 
			
		||||
            name = item['name']
 | 
			
		||||
            if name not in listing_filter:
 | 
			
		||||
                results.append(name)
 | 
			
		||||
        return results
 | 
			
		||||
 | 
			
		||||
    def get_object_data(self, swift_account, container_name, object_name,
 | 
			
		||||
                        compressed=False):
 | 
			
		||||
        '''reads an object and yields its lines'''
 | 
			
		||||
        code, o = self.internal_proxy.get_object(swift_account, container_name,
 | 
			
		||||
                                                 object_name)
 | 
			
		||||
        if code < 200 or code >= 300:
 | 
			
		||||
            raise BadFileDownload(code)
 | 
			
		||||
        last_part = ''
 | 
			
		||||
        last_compressed_part = ''
 | 
			
		||||
        # magic in the following zlib.decompressobj argument is courtesy of
 | 
			
		||||
        # Python decompressing gzip chunk-by-chunk
 | 
			
		||||
        # http://stackoverflow.com/questions/2423866
 | 
			
		||||
        d = zlib.decompressobj(16 + zlib.MAX_WBITS)
 | 
			
		||||
        try:
 | 
			
		||||
            for chunk in o:
 | 
			
		||||
                if compressed:
 | 
			
		||||
                    try:
 | 
			
		||||
                        chunk = d.decompress(chunk)
 | 
			
		||||
                    except zlib.error:
 | 
			
		||||
                        self.logger.debug(_('Bad compressed data for %s')
 | 
			
		||||
                            % '/'.join((swift_account, container_name,
 | 
			
		||||
                                        object_name)))
 | 
			
		||||
                        raise BadFileDownload()  # bad compressed data
 | 
			
		||||
                parts = chunk.split('\n')
 | 
			
		||||
                parts[0] = last_part + parts[0]
 | 
			
		||||
                for part in parts[:-1]:
 | 
			
		||||
                    yield part
 | 
			
		||||
                last_part = parts[-1]
 | 
			
		||||
            if last_part:
 | 
			
		||||
                yield last_part
 | 
			
		||||
        except ChunkReadTimeout:
 | 
			
		||||
            raise BadFileDownload()
 | 
			
		||||
 | 
			
		||||
    def generate_keylist_mapping(self):
 | 
			
		||||
        keylist = {}
 | 
			
		||||
        for plugin in self.plugins:
 | 
			
		||||
            plugin_keylist = self.plugins[plugin]['instance'].keylist_mapping()
 | 
			
		||||
            if not plugin_keylist:
 | 
			
		||||
                continue
 | 
			
		||||
            for k, v in plugin_keylist.items():
 | 
			
		||||
                o = keylist.get(k)
 | 
			
		||||
                if o:
 | 
			
		||||
                    if isinstance(o, set):
 | 
			
		||||
                        if isinstance(v, set):
 | 
			
		||||
                            o.update(v)
 | 
			
		||||
                        else:
 | 
			
		||||
                            o.update([v])
 | 
			
		||||
                    else:
 | 
			
		||||
                        o = set(o)
 | 
			
		||||
                        if isinstance(v, set):
 | 
			
		||||
                            o.update(v)
 | 
			
		||||
                        else:
 | 
			
		||||
                            o.update([v])
 | 
			
		||||
                else:
 | 
			
		||||
                    o = v
 | 
			
		||||
                keylist[k] = o
 | 
			
		||||
        return keylist
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LogProcessorDaemon(Daemon):
 | 
			
		||||
    """
 | 
			
		||||
    Gather raw log data and farm proccessing to generate a csv that is
 | 
			
		||||
    uploaded to swift.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, conf):
 | 
			
		||||
        c = conf.get('log-processor')
 | 
			
		||||
        super(LogProcessorDaemon, self).__init__(c)
 | 
			
		||||
        self.total_conf = conf
 | 
			
		||||
        self.logger = get_logger(c, log_route='log-processor')
 | 
			
		||||
        self.log_processor = LogProcessor(conf, self.logger)
 | 
			
		||||
        self.lookback_hours = int(c.get('lookback_hours', '120'))
 | 
			
		||||
        self.lookback_window = int(c.get('lookback_window',
 | 
			
		||||
                                   str(self.lookback_hours)))
 | 
			
		||||
        self.log_processor_account = c['swift_account']
 | 
			
		||||
        self.log_processor_container = c.get('container_name',
 | 
			
		||||
                                             'log_processing_data')
 | 
			
		||||
        self.worker_count = int(c.get('worker_count', '1'))
 | 
			
		||||
        self._keylist_mapping = None
 | 
			
		||||
        self.processed_files_filename = 'processed_files.pickle.gz'
 | 
			
		||||
 | 
			
		||||
    def get_lookback_interval(self):
 | 
			
		||||
        """
 | 
			
		||||
        :returns: lookback_start, lookback_end.
 | 
			
		||||
 | 
			
		||||
            Both or just lookback_end can be None. Otherwise, returns strings
 | 
			
		||||
            of the form 'YYYYMMDDHH'. The interval returned is used as bounds
 | 
			
		||||
            when looking for logs to processes.
 | 
			
		||||
 | 
			
		||||
            A returned None means don't limit the log files examined on that
 | 
			
		||||
            side of the interval.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        if self.lookback_hours == 0:
 | 
			
		||||
            lookback_start = None
 | 
			
		||||
            lookback_end = None
 | 
			
		||||
        else:
 | 
			
		||||
            delta_hours = datetime.timedelta(hours=self.lookback_hours)
 | 
			
		||||
            lookback_start = now() - delta_hours
 | 
			
		||||
            lookback_start = lookback_start.strftime('%Y%m%d%H')
 | 
			
		||||
            if self.lookback_window == 0:
 | 
			
		||||
                lookback_end = None
 | 
			
		||||
            else:
 | 
			
		||||
                delta_window = datetime.timedelta(hours=self.lookback_window)
 | 
			
		||||
                lookback_end = now() - \
 | 
			
		||||
                               delta_hours + \
 | 
			
		||||
                               delta_window
 | 
			
		||||
                lookback_end = lookback_end.strftime('%Y%m%d%H')
 | 
			
		||||
        return lookback_start, lookback_end
 | 
			
		||||
 | 
			
		||||
    def get_processed_files_list(self):
 | 
			
		||||
        """
 | 
			
		||||
        :returns: a set of files that have already been processed or returns
 | 
			
		||||
        None on error.
 | 
			
		||||
 | 
			
		||||
            Downloads the set from the stats account. Creates an empty set if
 | 
			
		||||
            the an existing file cannot be found.
 | 
			
		||||
        """
 | 
			
		||||
        try:
 | 
			
		||||
            # Note: this file (or data set) will grow without bound.
 | 
			
		||||
            # In practice, if it becomes a problem (say, after many months of
 | 
			
		||||
            # running), one could manually prune the file to remove older
 | 
			
		||||
            # entries. Automatically pruning on each run could be dangerous.
 | 
			
		||||
            # There is not a good way to determine when an old entry should be
 | 
			
		||||
            # pruned (lookback_hours could be set to anything and could change)
 | 
			
		||||
            stream = self.log_processor.get_object_data(
 | 
			
		||||
                         self.log_processor_account,
 | 
			
		||||
                         self.log_processor_container,
 | 
			
		||||
                         self.processed_files_filename,
 | 
			
		||||
                         compressed=True)
 | 
			
		||||
            buf = '\n'.join(x for x in stream)
 | 
			
		||||
            if buf:
 | 
			
		||||
                files = cPickle.loads(buf)
 | 
			
		||||
            else:
 | 
			
		||||
                return None
 | 
			
		||||
        except BadFileDownload, err:
 | 
			
		||||
            if err.status_code == 404:
 | 
			
		||||
                files = set()
 | 
			
		||||
            else:
 | 
			
		||||
                return None
 | 
			
		||||
        return files
 | 
			
		||||
 | 
			
		||||
    def get_aggregate_data(self, processed_files, input_data):
 | 
			
		||||
        """
 | 
			
		||||
        Aggregates stats data by account/hour, summing as needed.
 | 
			
		||||
 | 
			
		||||
        :param processed_files: set of processed files
 | 
			
		||||
        :param input_data: is the output from multiprocess_collate/the plugins.
 | 
			
		||||
 | 
			
		||||
        :returns: A dict containing data aggregated from the input_data
 | 
			
		||||
        passed in.
 | 
			
		||||
 | 
			
		||||
            The dict returned has tuple keys of the form:
 | 
			
		||||
                (account, year, month, day, hour)
 | 
			
		||||
            The dict returned has values that are dicts with items of this
 | 
			
		||||
                form:
 | 
			
		||||
            key:field_value
 | 
			
		||||
                - key corresponds to something in one of the plugin's keylist
 | 
			
		||||
                mapping, something like the tuple (source, level, verb, code)
 | 
			
		||||
                - field_value is the sum of the field_values for the
 | 
			
		||||
                corresponding values in the input
 | 
			
		||||
 | 
			
		||||
            Both input_data and the dict returned are hourly aggregations of
 | 
			
		||||
            stats.
 | 
			
		||||
 | 
			
		||||
            Multiple values for the same (account, hour, tuple key) found in
 | 
			
		||||
            input_data are summed in the dict returned.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        aggr_data = {}
 | 
			
		||||
        for item, data in input_data:
 | 
			
		||||
            # since item contains the plugin and the log name, new plugins will
 | 
			
		||||
            # "reprocess" the file and the results will be in the final csv.
 | 
			
		||||
            processed_files.add(item)
 | 
			
		||||
            for k, d in data.items():
 | 
			
		||||
                existing_data = aggr_data.get(k, {})
 | 
			
		||||
                for i, j in d.items():
 | 
			
		||||
                    current = existing_data.get(i, 0)
 | 
			
		||||
                    # merging strategy for key collisions is addition
 | 
			
		||||
                    # processing plugins need to realize this
 | 
			
		||||
                    existing_data[i] = current + j
 | 
			
		||||
                aggr_data[k] = existing_data
 | 
			
		||||
        return aggr_data
 | 
			
		||||
 | 
			
		||||
    def get_final_info(self, aggr_data):
 | 
			
		||||
        """
 | 
			
		||||
        Aggregates data from aggr_data based on the keylist mapping.
 | 
			
		||||
 | 
			
		||||
        :param aggr_data: The results of the get_aggregate_data function.
 | 
			
		||||
        :returns: a dict of further aggregated data
 | 
			
		||||
 | 
			
		||||
            The dict returned has keys of the form:
 | 
			
		||||
                (account, year, month, day, hour)
 | 
			
		||||
            The dict returned has values that are dicts with items of this
 | 
			
		||||
                 form:
 | 
			
		||||
                'field_name': field_value (int)
 | 
			
		||||
 | 
			
		||||
            Data is aggregated as specified by the keylist mapping. The
 | 
			
		||||
            keylist mapping specifies which keys to combine in aggr_data
 | 
			
		||||
            and the final field_names for these combined keys in the dict
 | 
			
		||||
            returned. Fields combined are summed.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        final_info = collections.defaultdict(dict)
 | 
			
		||||
        for account, data in aggr_data.items():
 | 
			
		||||
            for key, mapping in self.keylist_mapping.items():
 | 
			
		||||
                if isinstance(mapping, (list, set)):
 | 
			
		||||
                    value = 0
 | 
			
		||||
                    for k in mapping:
 | 
			
		||||
                        try:
 | 
			
		||||
                            value += data[k]
 | 
			
		||||
                        except KeyError:
 | 
			
		||||
                            pass
 | 
			
		||||
                else:
 | 
			
		||||
                    try:
 | 
			
		||||
                        value = data[mapping]
 | 
			
		||||
                    except KeyError:
 | 
			
		||||
                        value = 0
 | 
			
		||||
                final_info[account][key] = value
 | 
			
		||||
        return final_info
 | 
			
		||||
 | 
			
		||||
    def store_processed_files_list(self, processed_files):
 | 
			
		||||
        """
 | 
			
		||||
        Stores the proccessed files list in the stats account.
 | 
			
		||||
 | 
			
		||||
        :param processed_files: set of processed files
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        s = cPickle.dumps(processed_files, cPickle.HIGHEST_PROTOCOL)
 | 
			
		||||
        f = cStringIO.StringIO(s)
 | 
			
		||||
        self.log_processor.internal_proxy.upload_file(f,
 | 
			
		||||
            self.log_processor_account,
 | 
			
		||||
            self.log_processor_container,
 | 
			
		||||
            self.processed_files_filename)
 | 
			
		||||
 | 
			
		||||
    def get_output(self, final_info):
 | 
			
		||||
        """
 | 
			
		||||
        :returns: a list of rows to appear in the csv file.
 | 
			
		||||
 | 
			
		||||
            The first row contains the column headers for the rest of the
 | 
			
		||||
            rows in the returned list.
 | 
			
		||||
 | 
			
		||||
            Each row after the first row corresponds to an account's data
 | 
			
		||||
            for that hour.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        sorted_keylist_mapping = sorted(self.keylist_mapping)
 | 
			
		||||
        columns = ['data_ts', 'account'] + sorted_keylist_mapping
 | 
			
		||||
        output = [columns]
 | 
			
		||||
        for (account, year, month, day, hour), d in final_info.items():
 | 
			
		||||
            data_ts = '%04d/%02d/%02d %02d:00:00' % \
 | 
			
		||||
                (int(year), int(month), int(day), int(hour))
 | 
			
		||||
            row = [data_ts, '%s' % (account)]
 | 
			
		||||
            for k in sorted_keylist_mapping:
 | 
			
		||||
                row.append(str(d[k]))
 | 
			
		||||
            output.append(row)
 | 
			
		||||
        return output
 | 
			
		||||
 | 
			
		||||
    def store_output(self, output):
 | 
			
		||||
        """
 | 
			
		||||
        Takes the a list of rows and stores a csv file of the values in the
 | 
			
		||||
        stats account.
 | 
			
		||||
 | 
			
		||||
        :param output: list of rows to appear in the csv file
 | 
			
		||||
 | 
			
		||||
            This csv file is final product of this script.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        out_buf = '\n'.join([','.join(row) for row in output])
 | 
			
		||||
        h = hashlib.md5(out_buf).hexdigest()
 | 
			
		||||
        upload_name = time.strftime('%Y/%m/%d/%H/') + '%s.csv.gz' % h
 | 
			
		||||
        f = cStringIO.StringIO(out_buf)
 | 
			
		||||
        self.log_processor.internal_proxy.upload_file(f,
 | 
			
		||||
            self.log_processor_account,
 | 
			
		||||
            self.log_processor_container,
 | 
			
		||||
            upload_name)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def keylist_mapping(self):
 | 
			
		||||
        """
 | 
			
		||||
        :returns: the keylist mapping.
 | 
			
		||||
 | 
			
		||||
            The keylist mapping determines how the stats fields are aggregated
 | 
			
		||||
            in the final aggregation step.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        if self._keylist_mapping == None:
 | 
			
		||||
            self._keylist_mapping = \
 | 
			
		||||
                self.log_processor.generate_keylist_mapping()
 | 
			
		||||
        return self._keylist_mapping
 | 
			
		||||
 | 
			
		||||
    def process_logs(self, logs_to_process, processed_files):
 | 
			
		||||
        """
 | 
			
		||||
        :param logs_to_process: list of logs to process
 | 
			
		||||
        :param processed_files: set of processed files
 | 
			
		||||
 | 
			
		||||
        :returns: returns a list of rows of processed data.
 | 
			
		||||
 | 
			
		||||
            The first row is the column headers. The rest of the rows contain
 | 
			
		||||
            hourly aggregate data for the account specified in the row.
 | 
			
		||||
 | 
			
		||||
            Files processed are added to the processed_files set.
 | 
			
		||||
 | 
			
		||||
            When a large data structure is no longer needed, it is deleted in
 | 
			
		||||
            an effort to conserve memory.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        # map
 | 
			
		||||
        processor_args = (self.total_conf, self.logger)
 | 
			
		||||
        results = multiprocess_collate(processor_args, logs_to_process,
 | 
			
		||||
            self.worker_count)
 | 
			
		||||
 | 
			
		||||
        # reduce
 | 
			
		||||
        aggr_data = self.get_aggregate_data(processed_files, results)
 | 
			
		||||
        del results
 | 
			
		||||
 | 
			
		||||
        # group
 | 
			
		||||
        # reduce a large number of keys in aggr_data[k] to a small
 | 
			
		||||
        # number of output keys
 | 
			
		||||
        final_info = self.get_final_info(aggr_data)
 | 
			
		||||
        del aggr_data
 | 
			
		||||
 | 
			
		||||
        # output
 | 
			
		||||
        return self.get_output(final_info)
 | 
			
		||||
 | 
			
		||||
    def run_once(self, *args, **kwargs):
 | 
			
		||||
        """
 | 
			
		||||
        Process log files that fall within the lookback interval.
 | 
			
		||||
 | 
			
		||||
        Upload resulting csv file to stats account.
 | 
			
		||||
 | 
			
		||||
        Update processed files list and upload to stats account.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        for k in 'lookback_hours lookback_window'.split():
 | 
			
		||||
            if k in kwargs and kwargs[k] is not None:
 | 
			
		||||
                setattr(self, k, kwargs[k])
 | 
			
		||||
 | 
			
		||||
        start = time.time()
 | 
			
		||||
        self.logger.info(_("Beginning log processing"))
 | 
			
		||||
 | 
			
		||||
        lookback_start, lookback_end = self.get_lookback_interval()
 | 
			
		||||
        self.logger.debug('lookback_start: %s' % lookback_start)
 | 
			
		||||
        self.logger.debug('lookback_end: %s' % lookback_end)
 | 
			
		||||
 | 
			
		||||
        processed_files = self.get_processed_files_list()
 | 
			
		||||
        if processed_files == None:
 | 
			
		||||
            self.logger.error(_('Log processing unable to load list of '
 | 
			
		||||
                'already processed log files'))
 | 
			
		||||
            return
 | 
			
		||||
        self.logger.debug(_('found %d processed files') %
 | 
			
		||||
            len(processed_files))
 | 
			
		||||
 | 
			
		||||
        logs_to_process = self.log_processor.get_data_list(lookback_start,
 | 
			
		||||
            lookback_end, processed_files)
 | 
			
		||||
        self.logger.info(_('loaded %d files to process') %
 | 
			
		||||
            len(logs_to_process))
 | 
			
		||||
 | 
			
		||||
        if logs_to_process:
 | 
			
		||||
            output = self.process_logs(logs_to_process, processed_files)
 | 
			
		||||
            self.store_output(output)
 | 
			
		||||
            del output
 | 
			
		||||
 | 
			
		||||
            self.store_processed_files_list(processed_files)
 | 
			
		||||
 | 
			
		||||
        self.logger.info(_("Log processing done (%0.2f minutes)") %
 | 
			
		||||
            ((time.time() - start) / 60))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def multiprocess_collate(processor_args, logs_to_process, worker_count):
 | 
			
		||||
    '''
 | 
			
		||||
    yield hourly data from logs_to_process
 | 
			
		||||
    Every item that this function yields will be added to the processed files
 | 
			
		||||
    list.
 | 
			
		||||
    '''
 | 
			
		||||
    results = []
 | 
			
		||||
    in_queue = multiprocessing.Queue()
 | 
			
		||||
    out_queue = multiprocessing.Queue()
 | 
			
		||||
    for _junk in range(worker_count):
 | 
			
		||||
        p = multiprocessing.Process(target=collate_worker,
 | 
			
		||||
                                    args=(processor_args,
 | 
			
		||||
                                          in_queue,
 | 
			
		||||
                                          out_queue))
 | 
			
		||||
        p.start()
 | 
			
		||||
        results.append(p)
 | 
			
		||||
    for x in logs_to_process:
 | 
			
		||||
        in_queue.put(x)
 | 
			
		||||
    for _junk in range(worker_count):
 | 
			
		||||
        in_queue.put(None)  # tell the worker to end
 | 
			
		||||
    while True:
 | 
			
		||||
        try:
 | 
			
		||||
            item, data = out_queue.get_nowait()
 | 
			
		||||
        except Queue.Empty:
 | 
			
		||||
            time.sleep(.01)
 | 
			
		||||
        else:
 | 
			
		||||
            if not isinstance(data, Exception):
 | 
			
		||||
                yield item, data
 | 
			
		||||
        if not any(r.is_alive() for r in results) and out_queue.empty():
 | 
			
		||||
            # all the workers are done and nothing is in the queue
 | 
			
		||||
            break
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def collate_worker(processor_args, in_queue, out_queue):
 | 
			
		||||
    '''worker process for multiprocess_collate'''
 | 
			
		||||
    p = LogProcessor(*processor_args)
 | 
			
		||||
    while True:
 | 
			
		||||
        item = in_queue.get()
 | 
			
		||||
        if item is None:
 | 
			
		||||
            # no more work to process
 | 
			
		||||
            break
 | 
			
		||||
        try:
 | 
			
		||||
            ret = p.process_one_file(*item)
 | 
			
		||||
        except Exception, err:
 | 
			
		||||
            item_string = '/'.join(item[1:])
 | 
			
		||||
            p.logger.exception("Unable to process file '%s'" % (item_string))
 | 
			
		||||
            ret = err
 | 
			
		||||
        out_queue.put((item, ret))
 | 
			
		||||
@@ -1,188 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
from __future__ import with_statement
 | 
			
		||||
import os
 | 
			
		||||
import hashlib
 | 
			
		||||
import time
 | 
			
		||||
import gzip
 | 
			
		||||
import re
 | 
			
		||||
import sys
 | 
			
		||||
from paste.deploy import appconfig
 | 
			
		||||
 | 
			
		||||
from swift.common.internal_proxy import InternalProxy
 | 
			
		||||
from swift.common.daemon import Daemon
 | 
			
		||||
from swift.common import utils
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LogUploader(Daemon):
 | 
			
		||||
    '''
 | 
			
		||||
    Given a local directory, a swift account, and a container name, LogParser
 | 
			
		||||
    will upload all files in the local directory to the given account/
 | 
			
		||||
    container.  All but the newest files will be uploaded, and the files' md5
 | 
			
		||||
    sum will be computed. The hash is used to prevent duplicate data from
 | 
			
		||||
    being uploaded multiple times in different files (ex: log lines). Since
 | 
			
		||||
    the hash is computed, it is also used as the uploaded object's etag to
 | 
			
		||||
    ensure data integrity.
 | 
			
		||||
 | 
			
		||||
    Note that after the file is successfully uploaded, it will be unlinked.
 | 
			
		||||
 | 
			
		||||
    The given proxy server config is used to instantiate a proxy server for
 | 
			
		||||
    the object uploads.
 | 
			
		||||
 | 
			
		||||
    The default log file format is: plugin_name-%Y%m%d%H* . Any other format
 | 
			
		||||
    of log file names must supply a regular expression that defines groups
 | 
			
		||||
    for year, month, day, and hour. The regular expression will be evaluated
 | 
			
		||||
    with re.VERBOSE. A common example may be:
 | 
			
		||||
    source_filename_pattern = ^cdn_logger-
 | 
			
		||||
        (?P<year>[0-9]{4})
 | 
			
		||||
        (?P<month>[0-1][0-9])
 | 
			
		||||
        (?P<day>[0-3][0-9])
 | 
			
		||||
        (?P<hour>[0-2][0-9])
 | 
			
		||||
        .*$
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    def __init__(self, uploader_conf, plugin_name, regex=None, cutoff=None):
 | 
			
		||||
        super(LogUploader, self).__init__(uploader_conf)
 | 
			
		||||
        log_name = '%s-log-uploader' % plugin_name
 | 
			
		||||
        self.logger = utils.get_logger(uploader_conf, log_name,
 | 
			
		||||
                                       log_route=plugin_name)
 | 
			
		||||
        self.log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
 | 
			
		||||
        self.swift_account = uploader_conf['swift_account']
 | 
			
		||||
        self.container_name = uploader_conf['container_name']
 | 
			
		||||
        proxy_server_conf_loc = uploader_conf.get('proxy_server_conf',
 | 
			
		||||
                                            '/etc/swift/proxy-server.conf')
 | 
			
		||||
        proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
 | 
			
		||||
                                      name='proxy-server')
 | 
			
		||||
        self.internal_proxy = InternalProxy(proxy_server_conf)
 | 
			
		||||
        self.new_log_cutoff = int(cutoff or
 | 
			
		||||
                                  uploader_conf.get('new_log_cutoff', '7200'))
 | 
			
		||||
        self.unlink_log = uploader_conf.get('unlink_log', 'true').lower() in \
 | 
			
		||||
                utils.TRUE_VALUES
 | 
			
		||||
        self.filename_pattern = regex or \
 | 
			
		||||
            uploader_conf.get('source_filename_pattern',
 | 
			
		||||
                '''
 | 
			
		||||
                ^%s-
 | 
			
		||||
                (?P<year>[0-9]{4})
 | 
			
		||||
                (?P<month>[0-1][0-9])
 | 
			
		||||
                (?P<day>[0-3][0-9])
 | 
			
		||||
                (?P<hour>[0-2][0-9])
 | 
			
		||||
                .*$''' % plugin_name)
 | 
			
		||||
 | 
			
		||||
    def run_once(self, *args, **kwargs):
 | 
			
		||||
        self.logger.info(_("Uploading logs"))
 | 
			
		||||
        start = time.time()
 | 
			
		||||
        self.upload_all_logs()
 | 
			
		||||
        self.logger.info(_("Uploading logs complete (%0.2f minutes)") %
 | 
			
		||||
            ((time.time() - start) / 60))
 | 
			
		||||
 | 
			
		||||
    def get_relpath_to_files_under_log_dir(self):
 | 
			
		||||
        """
 | 
			
		||||
        Look under log_dir recursively and return all filenames as relpaths
 | 
			
		||||
 | 
			
		||||
        :returns : list of strs, the relpath to all filenames under log_dir
 | 
			
		||||
        """
 | 
			
		||||
        all_files = []
 | 
			
		||||
        for path, dirs, files in os.walk(self.log_dir):
 | 
			
		||||
            all_files.extend(os.path.join(path, f) for f in files)
 | 
			
		||||
        return [os.path.relpath(f, start=self.log_dir) for f in all_files]
 | 
			
		||||
 | 
			
		||||
    def filter_files(self, all_files):
 | 
			
		||||
        """
 | 
			
		||||
        Filter files based on regex pattern
 | 
			
		||||
 | 
			
		||||
        :param all_files: list of strs, relpath of the filenames under log_dir
 | 
			
		||||
        :param pattern: regex pattern to match against filenames
 | 
			
		||||
 | 
			
		||||
        :returns : dict mapping full path of file to match group dict
 | 
			
		||||
        """
 | 
			
		||||
        filename2match = {}
 | 
			
		||||
        found_match = False
 | 
			
		||||
        for filename in all_files:
 | 
			
		||||
            match = re.match(self.filename_pattern, filename, re.VERBOSE)
 | 
			
		||||
            if match:
 | 
			
		||||
                found_match = True
 | 
			
		||||
                full_path = os.path.join(self.log_dir, filename)
 | 
			
		||||
                filename2match[full_path] = match.groupdict()
 | 
			
		||||
            else:
 | 
			
		||||
                self.logger.debug(_('%(filename)s does not match '
 | 
			
		||||
                           '%(pattern)s') % {'filename': filename,
 | 
			
		||||
                                             'pattern': self.filename_pattern})
 | 
			
		||||
        return filename2match
 | 
			
		||||
 | 
			
		||||
    def upload_all_logs(self):
 | 
			
		||||
        """
 | 
			
		||||
        Match files under log_dir to source_filename_pattern and upload to
 | 
			
		||||
        swift
 | 
			
		||||
        """
 | 
			
		||||
        all_files = self.get_relpath_to_files_under_log_dir()
 | 
			
		||||
        filename2match = self.filter_files(all_files)
 | 
			
		||||
        if not filename2match:
 | 
			
		||||
            self.logger.error(_('No files in %(log_dir)s match %(pattern)s') %
 | 
			
		||||
                     {'log_dir': self.log_dir,
 | 
			
		||||
                      'pattern': self.filename_pattern})
 | 
			
		||||
            sys.exit(1)
 | 
			
		||||
        if not self.internal_proxy.create_container(self.swift_account,
 | 
			
		||||
                                                    self.container_name):
 | 
			
		||||
            self.logger.error(_('Unable to create container for '
 | 
			
		||||
                                '%(account)s/%(container)s') % {
 | 
			
		||||
                                    'account': self.swift_account,
 | 
			
		||||
                                    'container': self.container_name})
 | 
			
		||||
            return
 | 
			
		||||
        for filename, match in filename2match.items():
 | 
			
		||||
            # don't process very new logs
 | 
			
		||||
            seconds_since_mtime = time.time() - os.stat(filename).st_mtime
 | 
			
		||||
            if seconds_since_mtime < self.new_log_cutoff:
 | 
			
		||||
                self.logger.debug(_("Skipping log: %(file)s "
 | 
			
		||||
                                    "(< %(cutoff)d seconds old)") % {
 | 
			
		||||
                                        'file': filename,
 | 
			
		||||
                                        'cutoff': self.new_log_cutoff})
 | 
			
		||||
                continue
 | 
			
		||||
            self.upload_one_log(filename, **match)
 | 
			
		||||
 | 
			
		||||
    def upload_one_log(self, filename, year, month, day, hour):
 | 
			
		||||
        """
 | 
			
		||||
        Upload one file to swift
 | 
			
		||||
        """
 | 
			
		||||
        if os.path.getsize(filename) == 0:
 | 
			
		||||
            self.logger.debug(_("Log %s is 0 length, skipping") % filename)
 | 
			
		||||
            return
 | 
			
		||||
        self.logger.debug(_("Processing log: %s") % filename)
 | 
			
		||||
        filehash = hashlib.md5()
 | 
			
		||||
        already_compressed = True if filename.endswith('.gz') else False
 | 
			
		||||
        opener = gzip.open if already_compressed else open
 | 
			
		||||
        f = opener(filename, 'rb')
 | 
			
		||||
        try:
 | 
			
		||||
            for line in f:
 | 
			
		||||
                # filter out bad lines here?
 | 
			
		||||
                filehash.update(line)
 | 
			
		||||
        finally:
 | 
			
		||||
            f.close()
 | 
			
		||||
        filehash = filehash.hexdigest()
 | 
			
		||||
        # By adding a hash to the filename, we ensure that uploaded files
 | 
			
		||||
        # have unique filenames and protect against uploading one file
 | 
			
		||||
        # more than one time. By using md5, we get an etag for free.
 | 
			
		||||
        target_filename = '/'.join([year, month, day, hour, filehash + '.gz'])
 | 
			
		||||
        if self.internal_proxy.upload_file(filename,
 | 
			
		||||
                                          self.swift_account,
 | 
			
		||||
                                          self.container_name,
 | 
			
		||||
                                          target_filename,
 | 
			
		||||
                                          compress=(not already_compressed)):
 | 
			
		||||
            self.logger.debug(_("Uploaded log %(file)s to %(target)s") %
 | 
			
		||||
                {'file': filename, 'target': target_filename})
 | 
			
		||||
            if self.unlink_log:
 | 
			
		||||
                os.unlink(filename)
 | 
			
		||||
        else:
 | 
			
		||||
            self.logger.error(_("ERROR: Upload of log %s failed!") % filename)
 | 
			
		||||
@@ -1,69 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
from swift.common.utils import get_logger
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class StatsLogProcessor(object):
 | 
			
		||||
    """Transform account storage stat logs"""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, conf):
 | 
			
		||||
        self.logger = get_logger(conf, log_route='stats-processor')
 | 
			
		||||
 | 
			
		||||
    def process(self, obj_stream, data_object_account, data_object_container,
 | 
			
		||||
                data_object_name):
 | 
			
		||||
        '''generate hourly groupings of data from one stats log file'''
 | 
			
		||||
        account_totals = {}
 | 
			
		||||
        year, month, day, hour, _junk = data_object_name.split('/')
 | 
			
		||||
        for line in obj_stream:
 | 
			
		||||
            if not line:
 | 
			
		||||
                continue
 | 
			
		||||
            try:
 | 
			
		||||
                (account,
 | 
			
		||||
                container_count,
 | 
			
		||||
                object_count,
 | 
			
		||||
                bytes_used) = line.split(',')
 | 
			
		||||
            except (IndexError, ValueError):
 | 
			
		||||
                # bad line data
 | 
			
		||||
                self.logger.debug(_('Bad line data: %s') % repr(line))
 | 
			
		||||
                continue
 | 
			
		||||
            account = account.strip('"')
 | 
			
		||||
            container_count = int(container_count.strip('"'))
 | 
			
		||||
            object_count = int(object_count.strip('"'))
 | 
			
		||||
            bytes_used = int(bytes_used.strip('"'))
 | 
			
		||||
            aggr_key = (account, year, month, day, hour)
 | 
			
		||||
            d = account_totals.get(aggr_key, {})
 | 
			
		||||
            d['replica_count'] = d.setdefault('replica_count', 0) + 1
 | 
			
		||||
            d['container_count'] = d.setdefault('container_count', 0) + \
 | 
			
		||||
                                   container_count
 | 
			
		||||
            d['object_count'] = d.setdefault('object_count', 0) + \
 | 
			
		||||
                                object_count
 | 
			
		||||
            d['bytes_used'] = d.setdefault('bytes_used', 0) + \
 | 
			
		||||
                              bytes_used
 | 
			
		||||
            account_totals[aggr_key] = d
 | 
			
		||||
        return account_totals
 | 
			
		||||
 | 
			
		||||
    def keylist_mapping(self):
 | 
			
		||||
        '''
 | 
			
		||||
        returns a dictionary of final keys mapped to source keys
 | 
			
		||||
        '''
 | 
			
		||||
        keylist_mapping = {
 | 
			
		||||
        #   <db key> : <row key> or <set of row keys>
 | 
			
		||||
            'bytes_used': 'bytes_used',
 | 
			
		||||
            'container_count': 'container_count',
 | 
			
		||||
            'object_count': 'object_count',
 | 
			
		||||
            'replica_count': 'replica_count',
 | 
			
		||||
        }
 | 
			
		||||
        return keylist_mapping
 | 
			
		||||
@@ -2,6 +2,7 @@
 | 
			
		||||
 | 
			
		||||
import unittest
 | 
			
		||||
from nose import SkipTest
 | 
			
		||||
from uuid import uuid4
 | 
			
		||||
 | 
			
		||||
from swift.common.constraints import MAX_META_COUNT, MAX_META_NAME_LENGTH, \
 | 
			
		||||
    MAX_META_OVERALL_SIZE, MAX_META_VALUE_LENGTH
 | 
			
		||||
@@ -132,6 +133,32 @@ class TestAccount(unittest.TestCase):
 | 
			
		||||
        resp.read()
 | 
			
		||||
        self.assertEquals(resp.status, 400)
 | 
			
		||||
 | 
			
		||||
    def test_name_control_chars(self):
 | 
			
		||||
        if skip:
 | 
			
		||||
            raise SkipTest
 | 
			
		||||
 | 
			
		||||
        container = uuid4().hex
 | 
			
		||||
 | 
			
		||||
        def put(url, token, parsed, conn):
 | 
			
		||||
            conn.request('PUT', '%s/%s%%01test' %
 | 
			
		||||
                (parsed.path, container), '',
 | 
			
		||||
                {'X-Auth-Token': token, 'Content-Length': '0'})
 | 
			
		||||
            return check_response(conn)
 | 
			
		||||
 | 
			
		||||
        resp = retry(put)
 | 
			
		||||
        resp.read()
 | 
			
		||||
        self.assertTrue(resp.status in (201, 202))
 | 
			
		||||
 | 
			
		||||
        def get(url, token, parsed, conn):
 | 
			
		||||
            conn.request('GET', '%s?format=xml' % (parsed.path,), '',
 | 
			
		||||
                {'X-Auth-Token': token})
 | 
			
		||||
            return check_response(conn)
 | 
			
		||||
 | 
			
		||||
        resp = retry(get)
 | 
			
		||||
        body = resp.read()
 | 
			
		||||
        self.assertEquals(resp.status, 200)
 | 
			
		||||
        self.assertTrue('<name>%stest</name>' % (container,) in body)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -522,6 +522,50 @@ class TestContainer(unittest.TestCase):
 | 
			
		||||
        resp.read()
 | 
			
		||||
        self.assertEquals(resp.status, 201)
 | 
			
		||||
 | 
			
		||||
    def test_name_control_chars(self):
 | 
			
		||||
        if skip:
 | 
			
		||||
            raise SkipTest
 | 
			
		||||
 | 
			
		||||
        def put(url, token, parsed, conn):
 | 
			
		||||
            conn.request('PUT', '%s/%s%%00test' % (parsed.path, self.name), '',
 | 
			
		||||
                {'X-Auth-Token': token})
 | 
			
		||||
            return check_response(conn)
 | 
			
		||||
 | 
			
		||||
        resp = retry(put)
 | 
			
		||||
        resp.read()
 | 
			
		||||
        # NULLs not allowed
 | 
			
		||||
        self.assertEquals(resp.status, 412)
 | 
			
		||||
 | 
			
		||||
        def put(url, token, parsed, conn):
 | 
			
		||||
            conn.request('PUT', '%s/%s%%01test' % (parsed.path, self.name), '',
 | 
			
		||||
                {'X-Auth-Token': token})
 | 
			
		||||
            return check_response(conn)
 | 
			
		||||
 | 
			
		||||
        resp = retry(put)
 | 
			
		||||
        resp.read()
 | 
			
		||||
        # 0x01 allowed
 | 
			
		||||
        self.assertTrue(resp.status in (201, 202))
 | 
			
		||||
 | 
			
		||||
        def put(url, token, parsed, conn):
 | 
			
		||||
            conn.request('PUT', '%s/%s/object%%01test' %
 | 
			
		||||
                (parsed.path, self.name), '',
 | 
			
		||||
                {'X-Auth-Token': token, 'Content-Length': '0'})
 | 
			
		||||
            return check_response(conn)
 | 
			
		||||
 | 
			
		||||
        resp = retry(put)
 | 
			
		||||
        resp.read()
 | 
			
		||||
        self.assertTrue(resp.status in (201, 202))
 | 
			
		||||
 | 
			
		||||
        def get(url, token, parsed, conn):
 | 
			
		||||
            conn.request('GET', '%s/%s?format=xml' % (parsed.path, self.name),
 | 
			
		||||
                '', {'X-Auth-Token': token})
 | 
			
		||||
            return check_response(conn)
 | 
			
		||||
 | 
			
		||||
        resp = retry(get)
 | 
			
		||||
        body = resp.read()
 | 
			
		||||
        self.assertEquals(resp.status, 200)
 | 
			
		||||
        self.assertTrue('<name>objecttest</name>' in body)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -541,6 +541,30 @@ class TestObject(unittest.TestCase):
 | 
			
		||||
        resp.read()
 | 
			
		||||
        self.assertEquals(resp.status, 204)
 | 
			
		||||
 | 
			
		||||
    def test_name_control_chars(self):
 | 
			
		||||
        if skip:
 | 
			
		||||
            raise SkipTest
 | 
			
		||||
 | 
			
		||||
        def put(url, token, parsed, conn):
 | 
			
		||||
            conn.request('PUT', '%s/%s/obj%%00test' % (parsed.path,
 | 
			
		||||
                self.container), 'test', {'X-Auth-Token': token})
 | 
			
		||||
            return check_response(conn)
 | 
			
		||||
 | 
			
		||||
        resp = retry(put)
 | 
			
		||||
        resp.read()
 | 
			
		||||
        # NULLs not allowed
 | 
			
		||||
        self.assertEquals(resp.status, 412)
 | 
			
		||||
 | 
			
		||||
        def put(url, token, parsed, conn):
 | 
			
		||||
            conn.request('PUT', '%s/%s/obj%%01test' % (parsed.path,
 | 
			
		||||
                self.container), 'test', {'X-Auth-Token': token})
 | 
			
		||||
            return check_response(conn)
 | 
			
		||||
 | 
			
		||||
        resp = retry(put)
 | 
			
		||||
        resp.read()
 | 
			
		||||
        # 0x01 allowed
 | 
			
		||||
        self.assertEquals(resp.status, 201)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -22,6 +22,7 @@ from StringIO import StringIO
 | 
			
		||||
import simplejson
 | 
			
		||||
import xml.dom.minidom
 | 
			
		||||
from webob import Request
 | 
			
		||||
from xml.parsers.expat import ExpatError
 | 
			
		||||
 | 
			
		||||
from swift.account.server import AccountController, ACCOUNT_LISTING_LIMIT
 | 
			
		||||
from swift.common.utils import normalize_timestamp
 | 
			
		||||
@@ -450,7 +451,8 @@ class TestAccountController(unittest.TestCase):
 | 
			
		||||
                                     'X-Bytes-Used': '0',
 | 
			
		||||
                                     'X-Timestamp': normalize_timestamp(0)})
 | 
			
		||||
        self.controller.PUT(req)
 | 
			
		||||
        req = Request.blank('/sda1/p/a/c2', environ={'REQUEST_METHOD': 'PUT'},
 | 
			
		||||
        req = Request.blank('/sda1/p/a/c2%04',
 | 
			
		||||
                            environ={'REQUEST_METHOD': 'PUT'},
 | 
			
		||||
                            headers={'X-Put-Timestamp': '2',
 | 
			
		||||
                                     'X-Delete-Timestamp': '0',
 | 
			
		||||
                                     'X-Object-Count': '0',
 | 
			
		||||
@@ -462,7 +464,15 @@ class TestAccountController(unittest.TestCase):
 | 
			
		||||
        resp = self.controller.GET(req)
 | 
			
		||||
        self.assertEquals(resp.content_type, 'application/xml')
 | 
			
		||||
        self.assertEquals(resp.status_int, 200)
 | 
			
		||||
        dom = xml.dom.minidom.parseString(resp.body)
 | 
			
		||||
        try:
 | 
			
		||||
            dom = xml.dom.minidom.parseString(resp.body)
 | 
			
		||||
        except ExpatError, err:
 | 
			
		||||
            # Expat doesn't like control characters, which are XML 1.1
 | 
			
		||||
            # compatible. Soooo, we have to replace them. We'll do a specific
 | 
			
		||||
            # replace in this case, but real code that uses Expat will need
 | 
			
		||||
            # something more resilient.
 | 
			
		||||
            dom = xml.dom.minidom.parseString(
 | 
			
		||||
                resp.body.replace('', '\\x04'))
 | 
			
		||||
        self.assertEquals(dom.firstChild.nodeName, 'account')
 | 
			
		||||
        listing = \
 | 
			
		||||
            [n for n in dom.firstChild.childNodes if n.nodeName != '#text']
 | 
			
		||||
@@ -483,7 +493,7 @@ class TestAccountController(unittest.TestCase):
 | 
			
		||||
        self.assertEquals(sorted([n.nodeName for n in container]),
 | 
			
		||||
                          ['bytes', 'count', 'name'])
 | 
			
		||||
        node = [n for n in container if n.nodeName == 'name'][0]
 | 
			
		||||
        self.assertEquals(node.firstChild.nodeValue, 'c2')
 | 
			
		||||
        self.assertEquals(node.firstChild.nodeValue, 'c2\\x04')
 | 
			
		||||
        node = [n for n in container if n.nodeName == 'count'][0]
 | 
			
		||||
        self.assertEquals(node.firstChild.nodeValue, '0')
 | 
			
		||||
        node = [n for n in container if n.nodeName == 'bytes'][0]
 | 
			
		||||
@@ -495,7 +505,8 @@ class TestAccountController(unittest.TestCase):
 | 
			
		||||
                                     'X-Bytes-Used': '2',
 | 
			
		||||
                                     'X-Timestamp': normalize_timestamp(0)})
 | 
			
		||||
        self.controller.PUT(req)
 | 
			
		||||
        req = Request.blank('/sda1/p/a/c2', environ={'REQUEST_METHOD': 'PUT'},
 | 
			
		||||
        req = Request.blank('/sda1/p/a/c2%04',
 | 
			
		||||
                            environ={'REQUEST_METHOD': 'PUT'},
 | 
			
		||||
                            headers={'X-Put-Timestamp': '2',
 | 
			
		||||
                                     'X-Delete-Timestamp': '0',
 | 
			
		||||
                                     'X-Object-Count': '3',
 | 
			
		||||
@@ -506,7 +517,15 @@ class TestAccountController(unittest.TestCase):
 | 
			
		||||
                            environ={'REQUEST_METHOD': 'GET'})
 | 
			
		||||
        resp = self.controller.GET(req)
 | 
			
		||||
        self.assertEquals(resp.status_int, 200)
 | 
			
		||||
        dom = xml.dom.minidom.parseString(resp.body)
 | 
			
		||||
        try:
 | 
			
		||||
            dom = xml.dom.minidom.parseString(resp.body)
 | 
			
		||||
        except ExpatError, err:
 | 
			
		||||
            # Expat doesn't like control characters, which are XML 1.1
 | 
			
		||||
            # compatible. Soooo, we have to replace them. We'll do a specific
 | 
			
		||||
            # replace in this case, but real code that uses Expat will need
 | 
			
		||||
            # something more resilient.
 | 
			
		||||
            dom = xml.dom.minidom.parseString(
 | 
			
		||||
                resp.body.replace('', '\\x04'))
 | 
			
		||||
        self.assertEquals(dom.firstChild.nodeName, 'account')
 | 
			
		||||
        listing = \
 | 
			
		||||
            [n for n in dom.firstChild.childNodes if n.nodeName != '#text']
 | 
			
		||||
@@ -526,7 +545,7 @@ class TestAccountController(unittest.TestCase):
 | 
			
		||||
        self.assertEquals(sorted([n.nodeName for n in container]),
 | 
			
		||||
                          ['bytes', 'count', 'name'])
 | 
			
		||||
        node = [n for n in container if n.nodeName == 'name'][0]
 | 
			
		||||
        self.assertEquals(node.firstChild.nodeValue, 'c2')
 | 
			
		||||
        self.assertEquals(node.firstChild.nodeValue, 'c2\\x04')
 | 
			
		||||
        node = [n for n in container if n.nodeName == 'count'][0]
 | 
			
		||||
        self.assertEquals(node.firstChild.nodeValue, '3')
 | 
			
		||||
        node = [n for n in container if n.nodeName == 'bytes'][0]
 | 
			
		||||
@@ -959,6 +978,35 @@ class TestAccountController(unittest.TestCase):
 | 
			
		||||
            resp = self.controller.GET(req)
 | 
			
		||||
            self.assert_(resp.status_int in (204, 412), resp.status_int)
 | 
			
		||||
 | 
			
		||||
    def test_params_no_null(self):
 | 
			
		||||
        self.controller.PUT(Request.blank('/sda1/p/a',
 | 
			
		||||
                            headers={'X-Timestamp': normalize_timestamp(1)},
 | 
			
		||||
                            environ={'REQUEST_METHOD': 'PUT'}))
 | 
			
		||||
        for param in ('delimiter', 'format', 'limit', 'marker',
 | 
			
		||||
                      'prefix'):
 | 
			
		||||
            req = Request.blank('/sda1/p/a?%s=\x00' % param,
 | 
			
		||||
                                environ={'REQUEST_METHOD': 'GET'})
 | 
			
		||||
            resp = self.controller.GET(req)
 | 
			
		||||
            self.assertEquals(resp.status_int, 400)
 | 
			
		||||
 | 
			
		||||
    def test_PUT_account_no_null(self):
 | 
			
		||||
        req = Request.blank('/sda1/p/test\x00test',
 | 
			
		||||
            environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '1'})
 | 
			
		||||
        resp = self.controller.PUT(req)
 | 
			
		||||
        self.assertEquals(resp.status_int, 400)
 | 
			
		||||
 | 
			
		||||
    def test_PUT_container_no_null(self):
 | 
			
		||||
        req = Request.blank('/sda1/p/a',
 | 
			
		||||
            environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '1'})
 | 
			
		||||
        resp = self.controller.PUT(req)
 | 
			
		||||
        self.assertEquals(resp.status_int, 201)
 | 
			
		||||
        req = Request.blank('/sda1/p/a/test\x00test',
 | 
			
		||||
            environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_PUT_TIMESTAMP': '1',
 | 
			
		||||
                     'HTTP_X_DELETE_TIMESTAMP': '0',
 | 
			
		||||
                     'HTTP_X_OBJECT_COUNT': '0', 'HTTP_X_BYTES_USED': '0'})
 | 
			
		||||
        resp = self.controller.PUT(req)
 | 
			
		||||
        self.assertEquals(resp.status_int, 400)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -542,5 +542,23 @@ class TestAuth(unittest.TestCase):
 | 
			
		||||
        self.assertEquals(resp.status_int, 204)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestParseUserCreation(unittest.TestCase):
 | 
			
		||||
    def test_parse_user_creation(self):
 | 
			
		||||
        auth_filter = auth.filter_factory({
 | 
			
		||||
            'user_test_tester3': 'testing',
 | 
			
		||||
            'user_admin_admin': 'admin .admin .reseller_admin',
 | 
			
		||||
        })(FakeApp())
 | 
			
		||||
        self.assertEquals(auth_filter.users, {
 | 
			
		||||
            'admin:admin': {
 | 
			
		||||
                'url': 'http://127.0.0.1:8080/v1/AUTH_admin', 
 | 
			
		||||
                'groups': ['.admin', '.reseller_admin'], 
 | 
			
		||||
                'key': 'admin'
 | 
			
		||||
            }, 'test:tester3': {
 | 
			
		||||
                'url': 'http://127.0.0.1:8080/v1/AUTH_test', 
 | 
			
		||||
                'groups': [], 
 | 
			
		||||
                'key': 'testing'
 | 
			
		||||
            },
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -1,34 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
""" Tests for swift.common.compressing_file_reader """
 | 
			
		||||
 | 
			
		||||
import unittest
 | 
			
		||||
import cStringIO
 | 
			
		||||
 | 
			
		||||
from swift.common.compressing_file_reader import CompressingFileReader
 | 
			
		||||
 | 
			
		||||
class TestCompressingFileReader(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_read(self):
 | 
			
		||||
        plain = 'obj\ndata'
 | 
			
		||||
        s = cStringIO.StringIO(plain)
 | 
			
		||||
        expected = '\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff\xcaO\xca\xe2JI,'\
 | 
			
		||||
                   'I\x04\x00\x00\x00\xff\xff\x03\x00P(\xa8\x1f\x08\x00\x00'\
 | 
			
		||||
                   '\x00'
 | 
			
		||||
        x = CompressingFileReader(s)
 | 
			
		||||
        compressed = ''.join(iter(lambda: x.read(), ''))
 | 
			
		||||
        self.assertEquals(compressed, expected)
 | 
			
		||||
        self.assertEquals(x.read(), '')
 | 
			
		||||
@@ -1,192 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
# TODO: Tests
 | 
			
		||||
 | 
			
		||||
import unittest
 | 
			
		||||
import webob
 | 
			
		||||
import tempfile
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
from swift.common import internal_proxy
 | 
			
		||||
 | 
			
		||||
class DumbBaseApplicationFactory(object):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, status_codes, body=''):
 | 
			
		||||
        self.status_codes = status_codes[:]
 | 
			
		||||
        self.body = body
 | 
			
		||||
 | 
			
		||||
    def __call__(self, *a, **kw):
 | 
			
		||||
        app = DumbBaseApplication(*a, **kw)
 | 
			
		||||
        app.status_codes = self.status_codes
 | 
			
		||||
        try:
 | 
			
		||||
            app.default_status_code = self.status_codes[-1]
 | 
			
		||||
        except IndexError:
 | 
			
		||||
            app.default_status_code = 200
 | 
			
		||||
        app.body = self.body
 | 
			
		||||
        return app
 | 
			
		||||
 | 
			
		||||
class DumbBaseApplication(object):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *a, **kw):
 | 
			
		||||
        self.status_codes = []
 | 
			
		||||
        self.default_status_code = 200
 | 
			
		||||
        self.call_count = 0
 | 
			
		||||
        self.body = ''
 | 
			
		||||
 | 
			
		||||
    def handle_request(self, req):
 | 
			
		||||
        self.call_count += 1
 | 
			
		||||
        req.path_info_pop()
 | 
			
		||||
        if isinstance(self.body, list):
 | 
			
		||||
            try:
 | 
			
		||||
                body = self.body.pop(0)
 | 
			
		||||
            except IndexError:
 | 
			
		||||
                body = ''
 | 
			
		||||
        else:
 | 
			
		||||
            body = self.body
 | 
			
		||||
        resp = webob.Response(request=req, body=body,
 | 
			
		||||
                              conditional_response=True)
 | 
			
		||||
        try:
 | 
			
		||||
            resp.status_int = self.status_codes.pop(0)
 | 
			
		||||
        except IndexError:
 | 
			
		||||
            resp.status_int = self.default_status_code
 | 
			
		||||
        return resp
 | 
			
		||||
 | 
			
		||||
    def update_request(self, req):
 | 
			
		||||
        return req
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestInternalProxy(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_webob_request_copy(self):
 | 
			
		||||
        req = webob.Request.blank('/')
 | 
			
		||||
        req2 = internal_proxy.webob_request_copy(req)
 | 
			
		||||
        self.assertEquals(req.path, req2.path)
 | 
			
		||||
        self.assertEquals(req.path_info, req2.path_info)
 | 
			
		||||
        self.assertFalse(req is req2)
 | 
			
		||||
 | 
			
		||||
    def test_handle_request(self):
 | 
			
		||||
        status_codes = [200]
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes)
 | 
			
		||||
        p = internal_proxy.InternalProxy()
 | 
			
		||||
        req = webob.Request.blank('/')
 | 
			
		||||
        orig_req = internal_proxy.webob_request_copy(req)
 | 
			
		||||
        resp = p._handle_request(req)
 | 
			
		||||
        self.assertEquals(req.path_info, orig_req.path_info)
 | 
			
		||||
 | 
			
		||||
    def test_handle_request_with_retries(self):
 | 
			
		||||
        status_codes = [500, 200]
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes)
 | 
			
		||||
        p = internal_proxy.InternalProxy(retries=3)
 | 
			
		||||
        req = webob.Request.blank('/')
 | 
			
		||||
        orig_req = internal_proxy.webob_request_copy(req)
 | 
			
		||||
        resp = p._handle_request(req)
 | 
			
		||||
        self.assertEquals(req.path_info, orig_req.path_info)
 | 
			
		||||
        self.assertEquals(p.upload_app.call_count, 2)
 | 
			
		||||
        self.assertEquals(resp.status_int, 200)
 | 
			
		||||
 | 
			
		||||
    def test_get_object(self):
 | 
			
		||||
        status_codes = [200]
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes)
 | 
			
		||||
        p = internal_proxy.InternalProxy()
 | 
			
		||||
        code, body = p.get_object('a', 'c', 'o')
 | 
			
		||||
        body = ''.join(body)
 | 
			
		||||
        self.assertEquals(code, 200)
 | 
			
		||||
        self.assertEquals(body, '')
 | 
			
		||||
 | 
			
		||||
    def test_create_container(self):
 | 
			
		||||
        status_codes = [200]
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes)
 | 
			
		||||
        p = internal_proxy.InternalProxy()
 | 
			
		||||
        resp = p.create_container('a', 'c')
 | 
			
		||||
        self.assertTrue(resp)
 | 
			
		||||
 | 
			
		||||
    def test_handle_request_with_retries_all_error(self):
 | 
			
		||||
        status_codes = [500, 500, 500, 500, 500]
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes)
 | 
			
		||||
        p = internal_proxy.InternalProxy(retries=3)
 | 
			
		||||
        req = webob.Request.blank('/')
 | 
			
		||||
        orig_req = internal_proxy.webob_request_copy(req)
 | 
			
		||||
        resp = p._handle_request(req)
 | 
			
		||||
        self.assertEquals(req.path_info, orig_req.path_info)
 | 
			
		||||
        self.assertEquals(p.upload_app.call_count, 3)
 | 
			
		||||
        self.assertEquals(resp.status_int, 500)
 | 
			
		||||
 | 
			
		||||
    def test_get_container_list_empty(self):
 | 
			
		||||
        status_codes = [200]
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes, body='[]')
 | 
			
		||||
        p = internal_proxy.InternalProxy()
 | 
			
		||||
        resp = p.get_container_list('a', 'c')
 | 
			
		||||
        self.assertEquals(resp, [])
 | 
			
		||||
 | 
			
		||||
    def test_get_container_list_no_body(self):
 | 
			
		||||
        status_codes = [204]
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes, body='')
 | 
			
		||||
        p = internal_proxy.InternalProxy()
 | 
			
		||||
        resp = p.get_container_list('a', 'c')
 | 
			
		||||
        self.assertEquals(resp, [])
 | 
			
		||||
 | 
			
		||||
    def test_get_container_list_full_listing(self):
 | 
			
		||||
        status_codes = [200, 200]
 | 
			
		||||
        obj_a = dict(name='foo', hash='foo', bytes=3,
 | 
			
		||||
                     content_type='text/plain', last_modified='2011/01/01')
 | 
			
		||||
        obj_b = dict(name='bar', hash='bar', bytes=3,
 | 
			
		||||
                     content_type='text/plain', last_modified='2011/01/01')
 | 
			
		||||
        body = [json.dumps([obj_a]), json.dumps([obj_b]), json.dumps([])]
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes, body=body)
 | 
			
		||||
        p = internal_proxy.InternalProxy()
 | 
			
		||||
        resp = p.get_container_list('a', 'c')
 | 
			
		||||
        expected = ['foo', 'bar']
 | 
			
		||||
        self.assertEquals([x['name'] for x in resp], expected)
 | 
			
		||||
 | 
			
		||||
    def test_get_container_list_full(self):
 | 
			
		||||
        status_codes = [204]
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes, body='')
 | 
			
		||||
        p = internal_proxy.InternalProxy()
 | 
			
		||||
        resp = p.get_container_list('a', 'c', marker='a', end_marker='b',
 | 
			
		||||
                                    limit=100, prefix='/', delimiter='.')
 | 
			
		||||
        self.assertEquals(resp, [])
 | 
			
		||||
 | 
			
		||||
    def test_upload_file(self):
 | 
			
		||||
        status_codes = [200, 200]  # container PUT + object PUT
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes)
 | 
			
		||||
        p = internal_proxy.InternalProxy()
 | 
			
		||||
        with tempfile.NamedTemporaryFile() as file_obj:
 | 
			
		||||
            resp = p.upload_file(file_obj.name, 'a', 'c', 'o')
 | 
			
		||||
        self.assertTrue(resp)
 | 
			
		||||
 | 
			
		||||
    def test_upload_file_with_retries(self):
 | 
			
		||||
        status_codes = [200, 500, 200]  # container PUT + error + object PUT
 | 
			
		||||
        internal_proxy.BaseApplication = DumbBaseApplicationFactory(
 | 
			
		||||
                                            status_codes)
 | 
			
		||||
        p = internal_proxy.InternalProxy(retries=3)
 | 
			
		||||
        with tempfile.NamedTemporaryFile() as file_obj:
 | 
			
		||||
            resp = p.upload_file(file_obj, 'a', 'c', 'o')
 | 
			
		||||
        self.assertTrue(resp)
 | 
			
		||||
        self.assertEquals(p.upload_app.call_count, 3)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
@@ -246,6 +246,24 @@ class TestContainerController(unittest.TestCase):
 | 
			
		||||
        resp = self.controller.PUT(req)
 | 
			
		||||
        self.assertEquals(resp.status_int, 201)
 | 
			
		||||
 | 
			
		||||
    def test_PUT_container_no_null(self):
 | 
			
		||||
        req = Request.blank('/sda1/p/a/test\x00test',
 | 
			
		||||
            environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '1'})
 | 
			
		||||
        resp = self.controller.PUT(req)
 | 
			
		||||
        self.assertEquals(resp.status_int, 400)
 | 
			
		||||
 | 
			
		||||
    def test_PUT_object_no_null(self):
 | 
			
		||||
        req = Request.blank('/sda1/p/a/test',
 | 
			
		||||
            environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '1'})
 | 
			
		||||
        resp = self.controller.PUT(req)
 | 
			
		||||
        self.assertEquals(resp.status_int, 201)
 | 
			
		||||
        req = Request.blank('/sda1/p/a/test/test\x00test',
 | 
			
		||||
            environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '1',
 | 
			
		||||
                     'HTTP_X_SIZE': '0', 'HTTP_X_CONTENT_TYPE': 'text/plain',
 | 
			
		||||
                     'HTTP_X_ETAG': 'd41d8cd98f00b204e9800998ecf8427e'})
 | 
			
		||||
        resp = self.controller.PUT(req)
 | 
			
		||||
        self.assertEquals(resp.status_int, 400)
 | 
			
		||||
 | 
			
		||||
    def test_PUT_account_update(self):
 | 
			
		||||
        bindsock = listen(('127.0.0.1', 0))
 | 
			
		||||
        def accept(return_code, expected_timestamp):
 | 
			
		||||
@@ -582,25 +600,25 @@ class TestContainerController(unittest.TestCase):
 | 
			
		||||
        resp = self.controller.PUT(req)
 | 
			
		||||
        # fill the container
 | 
			
		||||
        for i in range(3):
 | 
			
		||||
            req = Request.blank('/sda1/p/a/xmlc/%s'%i, environ=
 | 
			
		||||
                    {'REQUEST_METHOD': 'PUT',
 | 
			
		||||
                    'HTTP_X_TIMESTAMP': '1',
 | 
			
		||||
                    'HTTP_X_CONTENT_TYPE': 'text/plain',
 | 
			
		||||
                    'HTTP_X_ETAG': 'x',
 | 
			
		||||
                    'HTTP_X_SIZE': 0})
 | 
			
		||||
            req = Request.blank('/sda1/p/a/xmlc/%s%%%02x' % (i, i + 1),
 | 
			
		||||
                environ={'REQUEST_METHOD': 'PUT',
 | 
			
		||||
                         'HTTP_X_TIMESTAMP': '1',
 | 
			
		||||
                         'HTTP_X_CONTENT_TYPE': 'text/plain',
 | 
			
		||||
                         'HTTP_X_ETAG': 'x',
 | 
			
		||||
                         'HTTP_X_SIZE': 0})
 | 
			
		||||
            resp = self.controller.PUT(req)
 | 
			
		||||
            self.assertEquals(resp.status_int, 201)
 | 
			
		||||
        xml_body = '<?xml version="1.0" encoding="UTF-8"?>\n' \
 | 
			
		||||
            '<container name="xmlc">' \
 | 
			
		||||
                '<object><name>0</name><hash>x</hash><bytes>0</bytes>' \
 | 
			
		||||
                '<object><name>0</name><hash>x</hash><bytes>0</bytes>' \
 | 
			
		||||
                    '<content_type>text/plain</content_type>' \
 | 
			
		||||
                    '<last_modified>1970-01-01T00:00:01' \
 | 
			
		||||
                    '</last_modified></object>' \
 | 
			
		||||
                '<object><name>1</name><hash>x</hash><bytes>0</bytes>' \
 | 
			
		||||
                '<object><name>1</name><hash>x</hash><bytes>0</bytes>' \
 | 
			
		||||
                    '<content_type>text/plain</content_type>' \
 | 
			
		||||
                    '<last_modified>1970-01-01T00:00:01' \
 | 
			
		||||
                    '</last_modified></object>' \
 | 
			
		||||
                '<object><name>2</name><hash>x</hash><bytes>0</bytes>' \
 | 
			
		||||
                '<object><name>2</name><hash>x</hash><bytes>0</bytes>' \
 | 
			
		||||
                    '<content_type>text/plain</content_type>' \
 | 
			
		||||
                    '<last_modified>1970-01-01T00:00:01' \
 | 
			
		||||
                    '</last_modified></object>' \
 | 
			
		||||
@@ -822,6 +840,17 @@ class TestContainerController(unittest.TestCase):
 | 
			
		||||
            resp = self.controller.GET(req)
 | 
			
		||||
            self.assert_(resp.status_int in (204, 412), resp.status_int)
 | 
			
		||||
 | 
			
		||||
    def test_params_no_null(self):
 | 
			
		||||
        self.controller.PUT(Request.blank('/sda1/p/a/c',
 | 
			
		||||
                            headers={'X-Timestamp': normalize_timestamp(1)},
 | 
			
		||||
                            environ={'REQUEST_METHOD': 'PUT'}))
 | 
			
		||||
        for param in ('delimiter', 'format', 'limit', 'marker', 'path',
 | 
			
		||||
                      'prefix'):
 | 
			
		||||
            req = Request.blank('/sda1/p/a/c?%s=\x00' % param,
 | 
			
		||||
                                environ={'REQUEST_METHOD': 'GET'})
 | 
			
		||||
            resp = self.controller.GET(req)
 | 
			
		||||
            self.assertEquals(resp.status_int, 400)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
 
 | 
			
		||||
@@ -2045,8 +2045,8 @@ class TestObjectController(unittest.TestCase):
 | 
			
		||||
            proxy_server.http_connect = fake_http_connect(201, 201, 201, 201)
 | 
			
		||||
            controller = proxy_server.ObjectController(self.app, 'account',
 | 
			
		||||
                'container', 'object')
 | 
			
		||||
            req = Request.blank('/a/c/o', {}, headers={
 | 
			
		||||
                'Transfer-Encoding': 'chunked',
 | 
			
		||||
            req = Request.blank('/a/c/o', environ={'REQUEST_METHOD': 'COPY'},
 | 
			
		||||
                headers={'Transfer-Encoding': 'chunked',
 | 
			
		||||
                'Content-Type': 'foo/bar'})
 | 
			
		||||
 | 
			
		||||
            req.body_file = ChunkedFile(10)
 | 
			
		||||
@@ -2058,8 +2058,8 @@ class TestObjectController(unittest.TestCase):
 | 
			
		||||
            # test 413 entity to large
 | 
			
		||||
            from swift.proxy import server
 | 
			
		||||
            proxy_server.http_connect = fake_http_connect(201, 201, 201, 201)
 | 
			
		||||
            req = Request.blank('/a/c/o', {}, headers={
 | 
			
		||||
                'Transfer-Encoding': 'chunked',
 | 
			
		||||
            req = Request.blank('/a/c/o', environ={'REQUEST_METHOD': 'COPY'},
 | 
			
		||||
                headers={'Transfer-Encoding': 'chunked',
 | 
			
		||||
                'Content-Type': 'foo/bar'})
 | 
			
		||||
            req.body_file = ChunkedFile(11)
 | 
			
		||||
            self.app.memcache.store = {}
 | 
			
		||||
@@ -2111,6 +2111,20 @@ class TestObjectController(unittest.TestCase):
 | 
			
		||||
        exp = 'HTTP/1.1 412'
 | 
			
		||||
        self.assertEquals(headers[:len(exp)], exp)
 | 
			
		||||
 | 
			
		||||
    def test_chunked_put_bad_utf8_null(self):
 | 
			
		||||
        # Check invalid utf-8
 | 
			
		||||
        (prolis, acc1lis, acc2lis, con2lis, con2lis, obj1lis, obj2lis) = \
 | 
			
		||||
                 _test_sockets
 | 
			
		||||
        sock = connect_tcp(('localhost', prolis.getsockname()[1]))
 | 
			
		||||
        fd = sock.makefile()
 | 
			
		||||
        fd.write('GET /v1/a%00 HTTP/1.1\r\nHost: localhost\r\n'
 | 
			
		||||
            'Connection: close\r\nX-Auth-Token: t\r\n'
 | 
			
		||||
            'Content-Length: 0\r\n\r\n')
 | 
			
		||||
        fd.flush()
 | 
			
		||||
        headers = readuntil2crlfs(fd)
 | 
			
		||||
        exp = 'HTTP/1.1 412'
 | 
			
		||||
        self.assertEquals(headers[:len(exp)], exp)
 | 
			
		||||
 | 
			
		||||
    def test_chunked_put_bad_path_no_controller(self):
 | 
			
		||||
        # Check bad path, no controller
 | 
			
		||||
        (prolis, acc1lis, acc2lis, con2lis, con2lis, obj1lis, obj2lis) = \
 | 
			
		||||
 
 | 
			
		||||
@@ -1,94 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
# TODO: Tests
 | 
			
		||||
 | 
			
		||||
import unittest
 | 
			
		||||
from swift.stats import access_processor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestAccessProcessor(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_log_line_parser_query_args(self):
 | 
			
		||||
        p = access_processor.AccessLogProcessor({})
 | 
			
		||||
        log_line = [str(x) for x in range(18)]
 | 
			
		||||
        log_line[1] = 'proxy-server'
 | 
			
		||||
        log_line[4] = '1/Jan/3/4/5/6'
 | 
			
		||||
        query = 'foo'
 | 
			
		||||
        for param in access_processor.LISTING_PARAMS:
 | 
			
		||||
            query += '&%s=blah' % param
 | 
			
		||||
        log_line[6] = '/v1/a/c/o?%s' % query
 | 
			
		||||
        log_line = 'x'*16 + ' '.join(log_line)
 | 
			
		||||
        res = p.log_line_parser(log_line)
 | 
			
		||||
        expected = {'code': 8, 'processing_time': '17', 'auth_token': '11',
 | 
			
		||||
                    'month': '01', 'second': '6', 'year': '3', 'tz': '+0000',
 | 
			
		||||
                    'http_version': '7', 'object_name': 'o', 'etag': '14',
 | 
			
		||||
                    'method': '5', 'trans_id': '15', 'client_ip': '2',
 | 
			
		||||
                    'bytes_out': 13, 'container_name': 'c', 'day': '1',
 | 
			
		||||
                    'minute': '5', 'account': 'a', 'hour': '4',
 | 
			
		||||
                    'referrer': '9', 'request': '/v1/a/c/o',
 | 
			
		||||
                    'user_agent': '10', 'bytes_in': 12, 'lb_ip': '3'}
 | 
			
		||||
        for param in access_processor.LISTING_PARAMS:
 | 
			
		||||
            expected[param] = 1
 | 
			
		||||
        expected['query'] = query
 | 
			
		||||
        self.assertEquals(res, expected)
 | 
			
		||||
        
 | 
			
		||||
    def test_log_line_parser_field_count(self):
 | 
			
		||||
        p = access_processor.AccessLogProcessor({})
 | 
			
		||||
        # too few fields
 | 
			
		||||
        log_line = [str(x) for x in range(17)]
 | 
			
		||||
        log_line[1] = 'proxy-server'
 | 
			
		||||
        log_line[4] = '1/Jan/3/4/5/6'
 | 
			
		||||
        log_line[6] = '/v1/a/c/o'
 | 
			
		||||
        log_line = 'x'*16 + ' '.join(log_line)
 | 
			
		||||
        res = p.log_line_parser(log_line)
 | 
			
		||||
        expected = {}
 | 
			
		||||
        self.assertEquals(res, expected)
 | 
			
		||||
        # right amount of fields
 | 
			
		||||
        log_line = [str(x) for x in range(18)]
 | 
			
		||||
        log_line[1] = 'proxy-server'
 | 
			
		||||
        log_line[4] = '1/Jan/3/4/5/6'
 | 
			
		||||
        log_line[6] = '/v1/a/c/o'
 | 
			
		||||
        log_line = 'x'*16 + ' '.join(log_line)
 | 
			
		||||
        res = p.log_line_parser(log_line)
 | 
			
		||||
        expected = {'code': 8, 'processing_time': '17', 'auth_token': '11',
 | 
			
		||||
                    'month': '01', 'second': '6', 'year': '3', 'tz': '+0000',
 | 
			
		||||
                    'http_version': '7', 'object_name': 'o', 'etag': '14',
 | 
			
		||||
                    'method': '5', 'trans_id': '15', 'client_ip': '2',
 | 
			
		||||
                    'bytes_out': 13, 'container_name': 'c', 'day': '1',
 | 
			
		||||
                    'minute': '5', 'account': 'a', 'hour': '4',
 | 
			
		||||
                    'referrer': '9', 'request': '/v1/a/c/o',
 | 
			
		||||
                    'user_agent': '10', 'bytes_in': 12, 'lb_ip': '3'}
 | 
			
		||||
        self.assertEquals(res, expected)
 | 
			
		||||
        # too many fields
 | 
			
		||||
        log_line = [str(x) for x in range(19)]
 | 
			
		||||
        log_line[1] = 'proxy-server'
 | 
			
		||||
        log_line[4] = '1/Jan/3/4/5/6'
 | 
			
		||||
        log_line[6] = '/v1/a/c/o'
 | 
			
		||||
        log_line = 'x'*16 + ' '.join(log_line)
 | 
			
		||||
        res = p.log_line_parser(log_line)
 | 
			
		||||
        expected = {'code': 8, 'processing_time': '17', 'auth_token': '11',
 | 
			
		||||
                    'month': '01', 'second': '6', 'year': '3', 'tz': '+0000',
 | 
			
		||||
                    'http_version': '7', 'object_name': 'o', 'etag': '14',
 | 
			
		||||
                    'method': '5', 'trans_id': '15', 'client_ip': '2',
 | 
			
		||||
                    'bytes_out': 13, 'container_name': 'c', 'day': '1',
 | 
			
		||||
                    'minute': '5', 'account': 'a', 'hour': '4',
 | 
			
		||||
                    'referrer': '9', 'request': '/v1/a/c/o',
 | 
			
		||||
                    'user_agent': '10', 'bytes_in': 12, 'lb_ip': '3'}
 | 
			
		||||
        self.assertEquals(res, expected)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
@@ -1,238 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
import unittest
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
import uuid
 | 
			
		||||
from shutil import rmtree
 | 
			
		||||
from swift.stats import db_stats_collector
 | 
			
		||||
from tempfile import mkdtemp
 | 
			
		||||
from test.unit import FakeLogger
 | 
			
		||||
from swift.common.db import AccountBroker, ContainerBroker
 | 
			
		||||
from swift.common.utils import mkdirs
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestDbStats(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        self._was_logger = db_stats_collector.get_logger
 | 
			
		||||
        db_stats_collector.get_logger = FakeLogger
 | 
			
		||||
        self.testdir = os.path.join(mkdtemp(), 'tmp_test_db_stats')
 | 
			
		||||
        self.devices = os.path.join(self.testdir, 'node')
 | 
			
		||||
        rmtree(self.testdir, ignore_errors=1)
 | 
			
		||||
        mkdirs(os.path.join(self.devices, 'sda'))
 | 
			
		||||
        self.accounts = os.path.join(self.devices, 'sda', 'accounts')
 | 
			
		||||
        self.containers = os.path.join(self.devices, 'sda', 'containers')
 | 
			
		||||
        self.log_dir = '%s/log' % self.testdir
 | 
			
		||||
 | 
			
		||||
        self.conf = dict(devices=self.devices,
 | 
			
		||||
                         log_dir=self.log_dir,
 | 
			
		||||
                         mount_check='false')
 | 
			
		||||
 | 
			
		||||
    def tearDown(self):
 | 
			
		||||
        db_stats_collector.get_logger = self._was_logger
 | 
			
		||||
        rmtree(self.testdir)
 | 
			
		||||
 | 
			
		||||
    def test_account_stat_get_data(self):
 | 
			
		||||
        stat = db_stats_collector.AccountStatsCollector(self.conf)
 | 
			
		||||
        account_db = AccountBroker("%s/acc.db" % self.accounts,
 | 
			
		||||
                                        account='test_acc')
 | 
			
		||||
        account_db.initialize()
 | 
			
		||||
        account_db.put_container('test_container', time.time(),
 | 
			
		||||
                                      None, 10, 1000)
 | 
			
		||||
        info = stat.get_data("%s/acc.db" % self.accounts)
 | 
			
		||||
        self.assertEquals('''"test_acc",1,10,1000\n''', info)
 | 
			
		||||
 | 
			
		||||
    def test_container_stat_get_data(self):
 | 
			
		||||
        stat = db_stats_collector.ContainerStatsCollector(self.conf)
 | 
			
		||||
        container_db = ContainerBroker("%s/con.db" % self.containers,
 | 
			
		||||
                                     account='test_acc', container='test_con')
 | 
			
		||||
        container_db.initialize()
 | 
			
		||||
        container_db.put_object('test_obj', time.time(), 10, 'text', 'faketag')
 | 
			
		||||
        info = stat.get_data("%s/con.db" % self.containers)
 | 
			
		||||
        self.assertEquals('''"test_acc","test_con",1,10\n''', info)
 | 
			
		||||
 | 
			
		||||
    def test_container_stat_get_metadata(self):
 | 
			
		||||
        stat = db_stats_collector.ContainerStatsCollector(self.conf)
 | 
			
		||||
        container_db = ContainerBroker("%s/con.db" % self.containers,
 | 
			
		||||
                                     account='test_acc', container='test_con')
 | 
			
		||||
        container_db.initialize()
 | 
			
		||||
        container_db.put_object('test_obj', time.time(), 10, 'text', 'faketag')
 | 
			
		||||
        info = stat.get_data("%s/con.db" % self.containers)
 | 
			
		||||
        self.assertEquals('''"test_acc","test_con",1,10\n''', info)
 | 
			
		||||
        container_db.update_metadata({'test1': ('val', 1000)})
 | 
			
		||||
 | 
			
		||||
    def _gen_account_stat(self):
 | 
			
		||||
        stat = db_stats_collector.AccountStatsCollector(self.conf)
 | 
			
		||||
        output_data = set()
 | 
			
		||||
        for i in range(10):
 | 
			
		||||
            account_db = AccountBroker("%s/stats-201001010%s-%s.db" %
 | 
			
		||||
                                       (self.accounts, i, uuid.uuid4().hex),
 | 
			
		||||
                                        account='test_acc_%s' % i)
 | 
			
		||||
            account_db.initialize()
 | 
			
		||||
            account_db.put_container('test_container', time.time(),
 | 
			
		||||
                                      None, 10, 1000)
 | 
			
		||||
            # this will "commit" the data
 | 
			
		||||
            account_db.get_info()
 | 
			
		||||
            output_data.add('''"test_acc_%s",1,10,1000''' % i),
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(output_data), 10)
 | 
			
		||||
        return stat, output_data
 | 
			
		||||
 | 
			
		||||
    def _drop_metadata_col(self, broker, acc_name):
 | 
			
		||||
        broker.conn.execute('''drop table container_stat''')
 | 
			
		||||
        broker.conn.executescript("""
 | 
			
		||||
            CREATE TABLE container_stat (
 | 
			
		||||
                account TEXT DEFAULT '%s',
 | 
			
		||||
                container TEXT DEFAULT 'test_con',
 | 
			
		||||
                created_at TEXT,
 | 
			
		||||
                put_timestamp TEXT DEFAULT '0',
 | 
			
		||||
                delete_timestamp TEXT DEFAULT '0',
 | 
			
		||||
                object_count INTEGER,
 | 
			
		||||
                bytes_used INTEGER,
 | 
			
		||||
                reported_put_timestamp TEXT DEFAULT '0',
 | 
			
		||||
                reported_delete_timestamp TEXT DEFAULT '0',
 | 
			
		||||
                reported_object_count INTEGER DEFAULT 0,
 | 
			
		||||
                reported_bytes_used INTEGER DEFAULT 0,
 | 
			
		||||
                hash TEXT default '00000000000000000000000000000000',
 | 
			
		||||
                id TEXT,
 | 
			
		||||
                status TEXT DEFAULT '',
 | 
			
		||||
                status_changed_at TEXT DEFAULT '0'
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            INSERT INTO container_stat (object_count, bytes_used)
 | 
			
		||||
                VALUES (1, 10);
 | 
			
		||||
        """ % acc_name)
 | 
			
		||||
 | 
			
		||||
    def _gen_container_stat(self, set_metadata=False, drop_metadata=False):
 | 
			
		||||
        if set_metadata:
 | 
			
		||||
            self.conf['metadata_keys'] = 'test1,test2'
 | 
			
		||||
            # webob runs title on all headers
 | 
			
		||||
        stat = db_stats_collector.ContainerStatsCollector(self.conf)
 | 
			
		||||
        output_data = set()
 | 
			
		||||
        for i in range(10):
 | 
			
		||||
            cont_db = ContainerBroker(
 | 
			
		||||
                "%s/container-stats-201001010%s-%s.db" % (self.containers, i,
 | 
			
		||||
                                                          uuid.uuid4().hex),
 | 
			
		||||
                 account='test_acc_%s' % i, container='test_con')
 | 
			
		||||
            cont_db.initialize()
 | 
			
		||||
            cont_db.put_object('test_obj', time.time(), 10, 'text', 'faketag')
 | 
			
		||||
            metadata_output = ''
 | 
			
		||||
            if set_metadata:
 | 
			
		||||
                if i % 2:
 | 
			
		||||
                    cont_db.update_metadata({'X-Container-Meta-Test1': (5, 1)})
 | 
			
		||||
                    metadata_output = ',1,'
 | 
			
		||||
                else:
 | 
			
		||||
                    cont_db.update_metadata({'X-Container-Meta-Test2': (7, 2)})
 | 
			
		||||
                    metadata_output = ',,1'
 | 
			
		||||
            # this will "commit" the data
 | 
			
		||||
            cont_db.get_info()
 | 
			
		||||
            if drop_metadata:
 | 
			
		||||
                output_data.add('''"test_acc_%s","test_con",1,10,,''' % i)
 | 
			
		||||
            else:
 | 
			
		||||
                output_data.add('''"test_acc_%s","test_con",1,10%s''' %
 | 
			
		||||
                                (i, metadata_output))
 | 
			
		||||
            if drop_metadata:
 | 
			
		||||
                self._drop_metadata_col(cont_db, 'test_acc_%s' % i)
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(output_data), 10)
 | 
			
		||||
        return stat, output_data
 | 
			
		||||
 | 
			
		||||
    def test_account_stat_run_once_account(self):
 | 
			
		||||
        stat, output_data = self._gen_account_stat()
 | 
			
		||||
        stat.run_once()
 | 
			
		||||
        stat_file = os.listdir(self.log_dir)[0]
 | 
			
		||||
        with open(os.path.join(self.log_dir, stat_file)) as stat_handle:
 | 
			
		||||
            for i in range(10):
 | 
			
		||||
                data = stat_handle.readline()
 | 
			
		||||
                output_data.discard(data.strip())
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(output_data), 0)
 | 
			
		||||
 | 
			
		||||
    def test_account_stat_run_once_container_metadata(self):
 | 
			
		||||
 | 
			
		||||
        stat, output_data = self._gen_container_stat(set_metadata=True)
 | 
			
		||||
        stat.run_once()
 | 
			
		||||
        stat_file = os.listdir(self.log_dir)[0]
 | 
			
		||||
        with open(os.path.join(self.log_dir, stat_file)) as stat_handle:
 | 
			
		||||
            headers = stat_handle.readline()
 | 
			
		||||
            self.assert_(headers.startswith('Account Hash,Container Name,'))
 | 
			
		||||
            for i in range(10):
 | 
			
		||||
                data = stat_handle.readline()
 | 
			
		||||
                output_data.discard(data.strip())
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(output_data), 0)
 | 
			
		||||
 | 
			
		||||
    def test_account_stat_run_once_container_no_metadata(self):
 | 
			
		||||
 | 
			
		||||
        stat, output_data = self._gen_container_stat(set_metadata=True,
 | 
			
		||||
                                                     drop_metadata=True)
 | 
			
		||||
        stat.run_once()
 | 
			
		||||
        stat_file = os.listdir(self.log_dir)[0]
 | 
			
		||||
        with open(os.path.join(self.log_dir, stat_file)) as stat_handle:
 | 
			
		||||
            headers = stat_handle.readline()
 | 
			
		||||
            self.assert_(headers.startswith('Account Hash,Container Name,'))
 | 
			
		||||
            for i in range(10):
 | 
			
		||||
                data = stat_handle.readline()
 | 
			
		||||
                output_data.discard(data.strip())
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(output_data), 0)
 | 
			
		||||
 | 
			
		||||
    def test_account_stat_run_once_both(self):
 | 
			
		||||
        acc_stat, acc_output_data = self._gen_account_stat()
 | 
			
		||||
        con_stat, con_output_data = self._gen_container_stat()
 | 
			
		||||
 | 
			
		||||
        acc_stat.run_once()
 | 
			
		||||
        stat_file = os.listdir(self.log_dir)[0]
 | 
			
		||||
        with open(os.path.join(self.log_dir, stat_file)) as stat_handle:
 | 
			
		||||
            for i in range(10):
 | 
			
		||||
                data = stat_handle.readline()
 | 
			
		||||
                acc_output_data.discard(data.strip())
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(acc_output_data), 0)
 | 
			
		||||
 | 
			
		||||
        con_stat.run_once()
 | 
			
		||||
        stat_file = [f for f in os.listdir(self.log_dir) if f != stat_file][0]
 | 
			
		||||
        with open(os.path.join(self.log_dir, stat_file)) as stat_handle:
 | 
			
		||||
            headers = stat_handle.readline()
 | 
			
		||||
            self.assert_(headers.startswith('Account Hash,Container Name,'))
 | 
			
		||||
            for i in range(10):
 | 
			
		||||
                data = stat_handle.readline()
 | 
			
		||||
                con_output_data.discard(data.strip())
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(con_output_data), 0)
 | 
			
		||||
 | 
			
		||||
    def test_account_stat_run_once_fail(self):
 | 
			
		||||
        stat, output_data = self._gen_account_stat()
 | 
			
		||||
        rmtree(self.accounts)
 | 
			
		||||
        stat.run_once()
 | 
			
		||||
        self.assertEquals(len(stat.logger.log_dict['debug']), 1)
 | 
			
		||||
 | 
			
		||||
    def test_not_implemented(self):
 | 
			
		||||
        db_stat = db_stats_collector.DatabaseStatsCollector(self.conf,
 | 
			
		||||
                                     'account', 'test_dir', 'stats-%Y%m%d%H_')
 | 
			
		||||
        self.assertRaises(NotImplementedError, db_stat.get_data)
 | 
			
		||||
        self.assertRaises(NotImplementedError, db_stat.get_header)
 | 
			
		||||
 | 
			
		||||
    def test_not_not_mounted(self):
 | 
			
		||||
        self.conf['mount_check'] = 'true'
 | 
			
		||||
        stat, output_data = self._gen_account_stat()
 | 
			
		||||
        stat.run_once()
 | 
			
		||||
        self.assertEquals(len(stat.logger.log_dict['error']), 1)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
@@ -1,834 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
import unittest
 | 
			
		||||
from test.unit import tmpfile
 | 
			
		||||
import Queue
 | 
			
		||||
import datetime
 | 
			
		||||
import hashlib
 | 
			
		||||
import pickle
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from swift.common import internal_proxy
 | 
			
		||||
from swift.stats import log_processor
 | 
			
		||||
from swift.common.exceptions import ChunkReadTimeout
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FakeUploadApp(object):
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
class DumbLogger(object):
 | 
			
		||||
    def __getattr__(self, n):
 | 
			
		||||
        return self.foo
 | 
			
		||||
 | 
			
		||||
    def foo(self, *a, **kw):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
class DumbInternalProxy(object):
 | 
			
		||||
    def __init__(self, code=200, timeout=False, bad_compressed=False):
 | 
			
		||||
        self.code = code
 | 
			
		||||
        self.timeout = timeout
 | 
			
		||||
        self.bad_compressed = bad_compressed
 | 
			
		||||
 | 
			
		||||
    def get_container_list(self, account, container, marker=None,
 | 
			
		||||
                           end_marker=None):
 | 
			
		||||
        n = '2010/03/14/13/obj1'
 | 
			
		||||
        if marker is None or n > marker:
 | 
			
		||||
            if end_marker:
 | 
			
		||||
                if n <= end_marker:
 | 
			
		||||
                    return [{'name': n}]
 | 
			
		||||
                else:
 | 
			
		||||
                    return []
 | 
			
		||||
            return [{'name': n}]
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    def get_object(self, account, container, object_name):
 | 
			
		||||
        if object_name.endswith('.gz'):
 | 
			
		||||
            if self.bad_compressed:
 | 
			
		||||
                # invalid compressed data
 | 
			
		||||
                def data():
 | 
			
		||||
                    yield '\xff\xff\xff\xff\xff\xff\xff'
 | 
			
		||||
            else:
 | 
			
		||||
                # 'obj\ndata', compressed with gzip -9
 | 
			
		||||
                def data():
 | 
			
		||||
                    yield '\x1f\x8b\x08'
 | 
			
		||||
                    yield '\x08"\xd79L'
 | 
			
		||||
                    yield '\x02\x03te'
 | 
			
		||||
                    yield 'st\x00\xcbO'
 | 
			
		||||
                    yield '\xca\xe2JI,I'
 | 
			
		||||
                    yield '\xe4\x02\x00O\xff'
 | 
			
		||||
                    yield '\xa3Y\t\x00\x00\x00'
 | 
			
		||||
        else:
 | 
			
		||||
            def data():
 | 
			
		||||
                yield 'obj\n'
 | 
			
		||||
                if self.timeout:
 | 
			
		||||
                    raise ChunkReadTimeout
 | 
			
		||||
                yield 'data'
 | 
			
		||||
        return self.code, data()
 | 
			
		||||
 | 
			
		||||
class TestLogProcessor(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    access_test_line = 'Jul  9 04:14:30 saio proxy-server 1.2.3.4 4.5.6.7 '\
 | 
			
		||||
                    '09/Jul/2010/04/14/30 GET '\
 | 
			
		||||
                    '/v1/acct/foo/bar?format=json&foo HTTP/1.0 200 - '\
 | 
			
		||||
                    'curl tk4e350daf-9338-4cc6-aabb-090e49babfbd '\
 | 
			
		||||
                    '6 95 - txfa431231-7f07-42fd-8fc7-7da9d8cc1f90 - 0.0262'
 | 
			
		||||
    stats_test_line = 'account,1,2,3'
 | 
			
		||||
    proxy_config = {'log-processor': {
 | 
			
		||||
 | 
			
		||||
                    }
 | 
			
		||||
                   }
 | 
			
		||||
 | 
			
		||||
    def test_lazy_load_internal_proxy(self):
 | 
			
		||||
        # stub out internal_proxy's upload_app
 | 
			
		||||
        internal_proxy.BaseApplication = FakeUploadApp
 | 
			
		||||
        dummy_proxy_config = """[app:proxy-server]
 | 
			
		||||
use = egg:swift#proxy
 | 
			
		||||
"""
 | 
			
		||||
        with tmpfile(dummy_proxy_config) as proxy_config_file:
 | 
			
		||||
            conf = {'log-processor': {
 | 
			
		||||
                    'proxy_server_conf': proxy_config_file,
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            p = log_processor.LogProcessor(conf, DumbLogger())
 | 
			
		||||
            self.assert_(isinstance(p._internal_proxy,
 | 
			
		||||
                                    None.__class__))
 | 
			
		||||
            self.assert_(isinstance(p.internal_proxy,
 | 
			
		||||
                                    log_processor.InternalProxy))
 | 
			
		||||
            self.assertEquals(p.internal_proxy, p._internal_proxy)
 | 
			
		||||
 | 
			
		||||
        # reset FakeUploadApp
 | 
			
		||||
        reload(internal_proxy)
 | 
			
		||||
 | 
			
		||||
    def test_access_log_line_parser(self):
 | 
			
		||||
        access_proxy_config = self.proxy_config.copy()
 | 
			
		||||
        access_proxy_config.update({
 | 
			
		||||
                        'log-processor-access': {
 | 
			
		||||
                            'source_filename_format':'%Y%m%d%H*',
 | 
			
		||||
                            'class_path':
 | 
			
		||||
                                'swift.stats.access_processor.AccessLogProcessor'
 | 
			
		||||
                        }})
 | 
			
		||||
        p = log_processor.LogProcessor(access_proxy_config, DumbLogger())
 | 
			
		||||
        result = p.plugins['access']['instance'].log_line_parser(self.access_test_line)
 | 
			
		||||
        self.assertEquals(result, {'code': 200,
 | 
			
		||||
           'processing_time': '0.0262',
 | 
			
		||||
           'auth_token': 'tk4e350daf-9338-4cc6-aabb-090e49babfbd',
 | 
			
		||||
           'month': '07',
 | 
			
		||||
           'second': '30',
 | 
			
		||||
           'year': '2010',
 | 
			
		||||
           'query': 'format=json&foo',
 | 
			
		||||
           'tz': '+0000',
 | 
			
		||||
           'http_version': 'HTTP/1.0',
 | 
			
		||||
           'object_name': 'bar',
 | 
			
		||||
           'etag': '-',
 | 
			
		||||
           'method': 'GET',
 | 
			
		||||
           'trans_id': 'txfa431231-7f07-42fd-8fc7-7da9d8cc1f90',
 | 
			
		||||
           'client_ip': '1.2.3.4',
 | 
			
		||||
           'format': 1,
 | 
			
		||||
           'bytes_out': 95,
 | 
			
		||||
           'container_name': 'foo',
 | 
			
		||||
           'day': '09',
 | 
			
		||||
           'minute': '14',
 | 
			
		||||
           'account': 'acct',
 | 
			
		||||
           'hour': '04',
 | 
			
		||||
           'referrer': '-',
 | 
			
		||||
           'request': '/v1/acct/foo/bar',
 | 
			
		||||
           'user_agent': 'curl',
 | 
			
		||||
           'bytes_in': 6,
 | 
			
		||||
           'lb_ip': '4.5.6.7'})
 | 
			
		||||
 | 
			
		||||
    def test_process_one_access_file(self):
 | 
			
		||||
        access_proxy_config = self.proxy_config.copy()
 | 
			
		||||
        access_proxy_config.update({
 | 
			
		||||
                        'log-processor-access': {
 | 
			
		||||
                            'source_filename_format':'%Y%m%d%H*',
 | 
			
		||||
                            'class_path':
 | 
			
		||||
                                'swift.stats.access_processor.AccessLogProcessor'
 | 
			
		||||
                        }})
 | 
			
		||||
        p = log_processor.LogProcessor(access_proxy_config, DumbLogger())
 | 
			
		||||
        def get_object_data(*a, **kw):
 | 
			
		||||
            return [self.access_test_line]
 | 
			
		||||
        p.get_object_data = get_object_data
 | 
			
		||||
        result = p.process_one_file('access', 'a', 'c', 'o')
 | 
			
		||||
        expected = {('acct', '2010', '07', '09', '04'):
 | 
			
		||||
                    {('public', 'object', 'GET', '2xx'): 1,
 | 
			
		||||
                    ('public', 'bytes_out'): 95,
 | 
			
		||||
                    'marker_query': 0,
 | 
			
		||||
                    'format_query': 1,
 | 
			
		||||
                    'delimiter_query': 0,
 | 
			
		||||
                    'path_query': 0,
 | 
			
		||||
                    ('public', 'bytes_in'): 6,
 | 
			
		||||
                    'prefix_query': 0}}
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_process_one_access_file_error(self):
 | 
			
		||||
        access_proxy_config = self.proxy_config.copy()
 | 
			
		||||
        access_proxy_config.update({
 | 
			
		||||
                        'log-processor-access': {
 | 
			
		||||
                            'source_filename_format':'%Y%m%d%H*',
 | 
			
		||||
                            'class_path':
 | 
			
		||||
                                'swift.stats.access_processor.AccessLogProcessor'
 | 
			
		||||
                        }})
 | 
			
		||||
        p = log_processor.LogProcessor(access_proxy_config, DumbLogger())
 | 
			
		||||
        p._internal_proxy = DumbInternalProxy(code=500)
 | 
			
		||||
        self.assertRaises(log_processor.BadFileDownload, p.process_one_file,
 | 
			
		||||
                          'access', 'a', 'c', 'o')
 | 
			
		||||
 | 
			
		||||
    def test_get_container_listing(self):
 | 
			
		||||
        p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
 | 
			
		||||
        p._internal_proxy = DumbInternalProxy()
 | 
			
		||||
        result = p.get_container_listing('a', 'foo')
 | 
			
		||||
        expected = ['2010/03/14/13/obj1']
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
        result = p.get_container_listing('a', 'foo', listing_filter=expected)
 | 
			
		||||
        expected = []
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
        result = p.get_container_listing('a', 'foo', start_date='2010031412',
 | 
			
		||||
                                            end_date='2010031414')
 | 
			
		||||
        expected = ['2010/03/14/13/obj1']
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
        result = p.get_container_listing('a', 'foo', start_date='2010031414')
 | 
			
		||||
        expected = []
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
        result = p.get_container_listing('a', 'foo', start_date='2010031410',
 | 
			
		||||
                                            end_date='2010031412')
 | 
			
		||||
        expected = []
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
        result = p.get_container_listing('a', 'foo', start_date='2010031412',
 | 
			
		||||
                                            end_date='2010031413')
 | 
			
		||||
        expected = ['2010/03/14/13/obj1']
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_get_object_data(self):
 | 
			
		||||
        p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
 | 
			
		||||
        p._internal_proxy = DumbInternalProxy()
 | 
			
		||||
        result = list(p.get_object_data('a', 'c', 'o', False))
 | 
			
		||||
        expected = ['obj','data']
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
        result = list(p.get_object_data('a', 'c', 'o.gz', True))
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_get_object_data_errors(self):
 | 
			
		||||
        p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
 | 
			
		||||
        p._internal_proxy = DumbInternalProxy(code=500)
 | 
			
		||||
        result = p.get_object_data('a', 'c', 'o')
 | 
			
		||||
        self.assertRaises(log_processor.BadFileDownload, list, result)
 | 
			
		||||
        p._internal_proxy = DumbInternalProxy(bad_compressed=True)
 | 
			
		||||
        result = p.get_object_data('a', 'c', 'o.gz', True)
 | 
			
		||||
        self.assertRaises(log_processor.BadFileDownload, list, result)
 | 
			
		||||
        p._internal_proxy = DumbInternalProxy(timeout=True)
 | 
			
		||||
        result = p.get_object_data('a', 'c', 'o')
 | 
			
		||||
        self.assertRaises(log_processor.BadFileDownload, list, result)
 | 
			
		||||
 | 
			
		||||
    def test_get_stat_totals(self):
 | 
			
		||||
        stats_proxy_config = self.proxy_config.copy()
 | 
			
		||||
        stats_proxy_config.update({
 | 
			
		||||
                        'log-processor-stats': {
 | 
			
		||||
                            'class_path':
 | 
			
		||||
                                'swift.stats.stats_processor.StatsLogProcessor'
 | 
			
		||||
                        }})
 | 
			
		||||
        p = log_processor.LogProcessor(stats_proxy_config, DumbLogger())
 | 
			
		||||
        p._internal_proxy = DumbInternalProxy()
 | 
			
		||||
        def get_object_data(*a,**kw):
 | 
			
		||||
            return [self.stats_test_line]
 | 
			
		||||
        p.get_object_data = get_object_data
 | 
			
		||||
        result = p.process_one_file('stats', 'a', 'c', 'y/m/d/h/o')
 | 
			
		||||
        expected = {('account', 'y', 'm', 'd', 'h'):
 | 
			
		||||
                    {'replica_count': 1,
 | 
			
		||||
                    'object_count': 2,
 | 
			
		||||
                    'container_count': 1,
 | 
			
		||||
                    'bytes_used': 3}}
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_generate_keylist_mapping(self):
 | 
			
		||||
        p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
 | 
			
		||||
        result = p.generate_keylist_mapping()
 | 
			
		||||
        expected = {}
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_generate_keylist_mapping_with_dummy_plugins(self):
 | 
			
		||||
        class Plugin1(object):
 | 
			
		||||
            def keylist_mapping(self):
 | 
			
		||||
                return {'a': 'b', 'c': 'd', 'e': ['f', 'g']}
 | 
			
		||||
        class Plugin2(object):
 | 
			
		||||
            def keylist_mapping(self):
 | 
			
		||||
                return {'a': '1', 'e': '2', 'h': '3'}
 | 
			
		||||
        p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
 | 
			
		||||
        p.plugins['plugin1'] = {'instance': Plugin1()}
 | 
			
		||||
        p.plugins['plugin2'] = {'instance': Plugin2()}
 | 
			
		||||
        result = p.generate_keylist_mapping()
 | 
			
		||||
        expected = {'a': set(['b', '1']), 'c': 'd', 'e': set(['2', 'f', 'g']),
 | 
			
		||||
                    'h': '3'}
 | 
			
		||||
        self.assertEquals(result, expected)
 | 
			
		||||
 | 
			
		||||
    def test_access_keylist_mapping_format(self):
 | 
			
		||||
        proxy_config = self.proxy_config.copy()
 | 
			
		||||
        proxy_config.update({
 | 
			
		||||
                        'log-processor-access': {
 | 
			
		||||
                            'source_filename_format':'%Y%m%d%H*',
 | 
			
		||||
                            'class_path':
 | 
			
		||||
                                'swift.stats.access_processor.AccessLogProcessor'
 | 
			
		||||
                        }})
 | 
			
		||||
        p = log_processor.LogProcessor(proxy_config, DumbLogger())
 | 
			
		||||
        mapping = p.generate_keylist_mapping()
 | 
			
		||||
        for k, v in mapping.items():
 | 
			
		||||
            # these only work for Py2.7+
 | 
			
		||||
            #self.assertIsInstance(k, str)
 | 
			
		||||
            self.assertTrue(isinstance(k, str), type(k))
 | 
			
		||||
 | 
			
		||||
    def test_stats_keylist_mapping_format(self):
 | 
			
		||||
        proxy_config = self.proxy_config.copy()
 | 
			
		||||
        proxy_config.update({
 | 
			
		||||
                        'log-processor-stats': {
 | 
			
		||||
                            'class_path':
 | 
			
		||||
                                'swift.stats.stats_processor.StatsLogProcessor'
 | 
			
		||||
                        }})
 | 
			
		||||
        p = log_processor.LogProcessor(proxy_config, DumbLogger())
 | 
			
		||||
        mapping = p.generate_keylist_mapping()
 | 
			
		||||
        for k, v in mapping.items():
 | 
			
		||||
            # these only work for Py2.7+
 | 
			
		||||
            #self.assertIsInstance(k, str)
 | 
			
		||||
            self.assertTrue(isinstance(k, str), type(k))
 | 
			
		||||
 | 
			
		||||
    def test_collate_worker(self):
 | 
			
		||||
        try:
 | 
			
		||||
            log_processor.LogProcessor._internal_proxy = DumbInternalProxy()
 | 
			
		||||
            def get_object_data(*a,**kw):
 | 
			
		||||
                return [self.access_test_line]
 | 
			
		||||
            orig_get_object_data = log_processor.LogProcessor.get_object_data
 | 
			
		||||
            log_processor.LogProcessor.get_object_data = get_object_data
 | 
			
		||||
            proxy_config = self.proxy_config.copy()
 | 
			
		||||
            proxy_config.update({
 | 
			
		||||
                    'log-processor-access': {
 | 
			
		||||
                        'source_filename_format':'%Y%m%d%H*',
 | 
			
		||||
                        'class_path':
 | 
			
		||||
                            'swift.stats.access_processor.AccessLogProcessor'
 | 
			
		||||
                    }})
 | 
			
		||||
            processor_args = (proxy_config, DumbLogger())
 | 
			
		||||
            q_in = Queue.Queue()
 | 
			
		||||
            q_out = Queue.Queue()
 | 
			
		||||
            work_request = ('access', 'a','c','o')
 | 
			
		||||
            q_in.put(work_request)
 | 
			
		||||
            q_in.put(None)
 | 
			
		||||
            log_processor.collate_worker(processor_args, q_in, q_out)
 | 
			
		||||
            item, ret = q_out.get()
 | 
			
		||||
            self.assertEquals(item, work_request)
 | 
			
		||||
            expected = {('acct', '2010', '07', '09', '04'):
 | 
			
		||||
                        {('public', 'object', 'GET', '2xx'): 1,
 | 
			
		||||
                        ('public', 'bytes_out'): 95,
 | 
			
		||||
                        'marker_query': 0,
 | 
			
		||||
                        'format_query': 1,
 | 
			
		||||
                        'delimiter_query': 0,
 | 
			
		||||
                        'path_query': 0,
 | 
			
		||||
                        ('public', 'bytes_in'): 6,
 | 
			
		||||
                        'prefix_query': 0}}
 | 
			
		||||
            self.assertEquals(ret, expected)
 | 
			
		||||
        finally:
 | 
			
		||||
            log_processor.LogProcessor._internal_proxy = None
 | 
			
		||||
            log_processor.LogProcessor.get_object_data = orig_get_object_data
 | 
			
		||||
 | 
			
		||||
    def test_collate_worker_error(self):
 | 
			
		||||
        def get_object_data(*a,**kw):
 | 
			
		||||
            raise Exception()
 | 
			
		||||
        orig_get_object_data = log_processor.LogProcessor.get_object_data
 | 
			
		||||
        try:
 | 
			
		||||
            log_processor.LogProcessor.get_object_data = get_object_data
 | 
			
		||||
            proxy_config = self.proxy_config.copy()
 | 
			
		||||
            proxy_config.update({
 | 
			
		||||
                    'log-processor-access': {
 | 
			
		||||
                        'source_filename_format':'%Y%m%d%H*',
 | 
			
		||||
                        'class_path':
 | 
			
		||||
                            'swift.stats.access_processor.AccessLogProcessor'
 | 
			
		||||
                    }})
 | 
			
		||||
            processor_args = (proxy_config, DumbLogger())
 | 
			
		||||
            q_in = Queue.Queue()
 | 
			
		||||
            q_out = Queue.Queue()
 | 
			
		||||
            work_request = ('access', 'a','c','o')
 | 
			
		||||
            q_in.put(work_request)
 | 
			
		||||
            q_in.put(None)
 | 
			
		||||
            log_processor.collate_worker(processor_args, q_in, q_out)
 | 
			
		||||
            item, ret = q_out.get()
 | 
			
		||||
            self.assertEquals(item, work_request)
 | 
			
		||||
            # these only work for Py2.7+
 | 
			
		||||
            #self.assertIsInstance(ret, log_processor.BadFileDownload)
 | 
			
		||||
            self.assertTrue(isinstance(ret, Exception))
 | 
			
		||||
        finally:
 | 
			
		||||
            log_processor.LogProcessor.get_object_data = orig_get_object_data
 | 
			
		||||
 | 
			
		||||
    def test_multiprocess_collate(self):
 | 
			
		||||
        try:
 | 
			
		||||
            log_processor.LogProcessor._internal_proxy = DumbInternalProxy()
 | 
			
		||||
            def get_object_data(*a,**kw):
 | 
			
		||||
                return [self.access_test_line]
 | 
			
		||||
            orig_get_object_data = log_processor.LogProcessor.get_object_data
 | 
			
		||||
            log_processor.LogProcessor.get_object_data = get_object_data
 | 
			
		||||
            proxy_config = self.proxy_config.copy()
 | 
			
		||||
            proxy_config.update({
 | 
			
		||||
                    'log-processor-access': {
 | 
			
		||||
                        'source_filename_format':'%Y%m%d%H*',
 | 
			
		||||
                        'class_path':
 | 
			
		||||
                            'swift.stats.access_processor.AccessLogProcessor'
 | 
			
		||||
                    }})
 | 
			
		||||
            processor_args = (proxy_config, DumbLogger())
 | 
			
		||||
            item = ('access', 'a','c','o')
 | 
			
		||||
            logs_to_process = [item]
 | 
			
		||||
            results = log_processor.multiprocess_collate(processor_args,
 | 
			
		||||
                                                         logs_to_process,
 | 
			
		||||
                                                         1)
 | 
			
		||||
            results = list(results)
 | 
			
		||||
            expected = [(item, {('acct', '2010', '07', '09', '04'):
 | 
			
		||||
                        {('public', 'object', 'GET', '2xx'): 1,
 | 
			
		||||
                        ('public', 'bytes_out'): 95,
 | 
			
		||||
                        'marker_query': 0,
 | 
			
		||||
                        'format_query': 1,
 | 
			
		||||
                        'delimiter_query': 0,
 | 
			
		||||
                        'path_query': 0,
 | 
			
		||||
                        ('public', 'bytes_in'): 6,
 | 
			
		||||
                        'prefix_query': 0}})]
 | 
			
		||||
            self.assertEquals(results, expected)
 | 
			
		||||
        finally:
 | 
			
		||||
            log_processor.LogProcessor._internal_proxy = None
 | 
			
		||||
            log_processor.LogProcessor.get_object_data = orig_get_object_data
 | 
			
		||||
 | 
			
		||||
    def test_multiprocess_collate_errors(self):
 | 
			
		||||
        def get_object_data(*a,**kw):
 | 
			
		||||
            raise log_processor.BadFileDownload()
 | 
			
		||||
        orig_get_object_data = log_processor.LogProcessor.get_object_data
 | 
			
		||||
        try:
 | 
			
		||||
            log_processor.LogProcessor.get_object_data = get_object_data
 | 
			
		||||
            proxy_config = self.proxy_config.copy()
 | 
			
		||||
            proxy_config.update({
 | 
			
		||||
                    'log-processor-access': {
 | 
			
		||||
                        'source_filename_format':'%Y%m%d%H*',
 | 
			
		||||
                        'class_path':
 | 
			
		||||
                            'swift.stats.access_processor.AccessLogProcessor'
 | 
			
		||||
                    }})
 | 
			
		||||
            processor_args = (proxy_config, DumbLogger())
 | 
			
		||||
            item = ('access', 'a','c','o')
 | 
			
		||||
            logs_to_process = [item]
 | 
			
		||||
            results = log_processor.multiprocess_collate(processor_args,
 | 
			
		||||
                                                         logs_to_process,
 | 
			
		||||
                                                         1)
 | 
			
		||||
            results = list(results)
 | 
			
		||||
            expected = []
 | 
			
		||||
            self.assertEquals(results, expected)
 | 
			
		||||
        finally:
 | 
			
		||||
            log_processor.LogProcessor._internal_proxy = None
 | 
			
		||||
            log_processor.LogProcessor.get_object_data = orig_get_object_data
 | 
			
		||||
 | 
			
		||||
class TestLogProcessorDaemon(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_get_lookback_interval(self):
 | 
			
		||||
        class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
            def __init__(self, lookback_hours, lookback_window):
 | 
			
		||||
                self.lookback_hours = lookback_hours
 | 
			
		||||
                self.lookback_window = lookback_window
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            d = datetime.datetime
 | 
			
		||||
 | 
			
		||||
            for x in [
 | 
			
		||||
                    [d(2011, 1, 1), 0, 0, None, None],
 | 
			
		||||
                    [d(2011, 1, 1), 120, 0, '2010122700', None],
 | 
			
		||||
                    [d(2011, 1, 1), 120, 24, '2010122700', '2010122800'],
 | 
			
		||||
                    [d(2010, 1, 2, 3, 4), 120, 48, '2009122803', '2009123003'],
 | 
			
		||||
                    [d(2009, 5, 6, 7, 8), 1200, 100, '2009031707', '2009032111'],
 | 
			
		||||
                    [d(2008, 9, 10, 11, 12), 3000, 1000, '2008050811', '2008061903'],
 | 
			
		||||
                ]:
 | 
			
		||||
 | 
			
		||||
                log_processor.now = lambda: x[0]
 | 
			
		||||
 | 
			
		||||
                d = MockLogProcessorDaemon(x[1], x[2])
 | 
			
		||||
                self.assertEquals((x[3], x[4]), d.get_lookback_interval())
 | 
			
		||||
        finally:
 | 
			
		||||
            log_processor.now = datetime.datetime.now
 | 
			
		||||
 | 
			
		||||
    def test_get_processed_files_list(self):
 | 
			
		||||
        class MockLogProcessor():
 | 
			
		||||
            def __init__(self, stream):
 | 
			
		||||
                self.stream = stream
 | 
			
		||||
 | 
			
		||||
            def get_object_data(self, *args, **kwargs):
 | 
			
		||||
                return self.stream
 | 
			
		||||
 | 
			
		||||
        class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
            def __init__(self, stream):
 | 
			
		||||
                self.log_processor = MockLogProcessor(stream)
 | 
			
		||||
                self.log_processor_account = 'account'
 | 
			
		||||
                self.log_processor_container = 'container'
 | 
			
		||||
                self.processed_files_filename = 'filename'
 | 
			
		||||
 | 
			
		||||
        file_list = set(['a', 'b', 'c'])
 | 
			
		||||
 | 
			
		||||
        for s, l in [['', None],
 | 
			
		||||
                [pickle.dumps(set()).split('\n'), set()],
 | 
			
		||||
                [pickle.dumps(file_list).split('\n'), file_list],
 | 
			
		||||
            ]:
 | 
			
		||||
 | 
			
		||||
            self.assertEquals(l,
 | 
			
		||||
                MockLogProcessorDaemon(s).get_processed_files_list())
 | 
			
		||||
 | 
			
		||||
    def test_get_processed_files_list_bad_file_downloads(self):
 | 
			
		||||
        class MockLogProcessor():
 | 
			
		||||
            def __init__(self, status_code):
 | 
			
		||||
                self.err = log_processor.BadFileDownload(status_code)
 | 
			
		||||
 | 
			
		||||
            def get_object_data(self, *a, **k):
 | 
			
		||||
                raise self.err
 | 
			
		||||
 | 
			
		||||
        class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
            def __init__(self, status_code):
 | 
			
		||||
                self.log_processor = MockLogProcessor(status_code)
 | 
			
		||||
                self.log_processor_account = 'account'
 | 
			
		||||
                self.log_processor_container = 'container'
 | 
			
		||||
                self.processed_files_filename = 'filename'
 | 
			
		||||
 | 
			
		||||
        for c, l in [[404, set()], [503, None], [None, None]]:
 | 
			
		||||
            self.assertEquals(l,
 | 
			
		||||
                MockLogProcessorDaemon(c).get_processed_files_list())
 | 
			
		||||
 | 
			
		||||
    def test_get_aggregate_data(self):
 | 
			
		||||
        # when run "for real"
 | 
			
		||||
        # the various keys/values in the input and output
 | 
			
		||||
        # dictionaries are often not simple strings
 | 
			
		||||
        # for testing we can use keys that are easier to work with
 | 
			
		||||
 | 
			
		||||
        processed_files = set()
 | 
			
		||||
 | 
			
		||||
        data_in = [
 | 
			
		||||
            ['file1', {
 | 
			
		||||
                'acct1_time1': {'field1': 1, 'field2': 2, 'field3': 3},
 | 
			
		||||
                'acct1_time2': {'field1': 4, 'field2': 5},
 | 
			
		||||
                'acct2_time1': {'field1': 6, 'field2': 7},
 | 
			
		||||
                'acct3_time3': {'field1': 8, 'field2': 9},
 | 
			
		||||
                }
 | 
			
		||||
            ],
 | 
			
		||||
            ['file2', {'acct1_time1': {'field1': 10}}],
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        expected_data_out = {
 | 
			
		||||
            'acct1_time1': {'field1': 11, 'field2': 2, 'field3': 3},
 | 
			
		||||
            'acct1_time2': {'field1': 4, 'field2': 5},
 | 
			
		||||
            'acct2_time1': {'field1': 6, 'field2': 7},
 | 
			
		||||
            'acct3_time3': {'field1': 8, 'field2': 9},
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        d = MockLogProcessorDaemon()
 | 
			
		||||
        data_out = d.get_aggregate_data(processed_files, data_in)
 | 
			
		||||
 | 
			
		||||
        for k, v in expected_data_out.items():
 | 
			
		||||
            self.assertEquals(v, data_out[k])
 | 
			
		||||
 | 
			
		||||
        self.assertEquals(set(['file1', 'file2']), processed_files)
 | 
			
		||||
 | 
			
		||||
    def test_get_final_info(self):
 | 
			
		||||
        # when run "for real"
 | 
			
		||||
        # the various keys/values in the input and output
 | 
			
		||||
        # dictionaries are often not simple strings
 | 
			
		||||
        # for testing we can use keys/values that are easier to work with
 | 
			
		||||
 | 
			
		||||
        class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                self._keylist_mapping = {
 | 
			
		||||
                    'out_field1':['field1', 'field2', 'field3'],
 | 
			
		||||
                    'out_field2':['field2', 'field3'],
 | 
			
		||||
                    'out_field3':['field3'],
 | 
			
		||||
                    'out_field4':'field4',
 | 
			
		||||
                    'out_field5':['field6', 'field7', 'field8'],
 | 
			
		||||
                    'out_field6':['field6'],
 | 
			
		||||
                    'out_field7':'field7',
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
        data_in = {
 | 
			
		||||
            'acct1_time1': {'field1': 11, 'field2': 2, 'field3': 3,
 | 
			
		||||
                'field4': 8, 'field5': 11},
 | 
			
		||||
            'acct1_time2': {'field1': 4, 'field2': 5},
 | 
			
		||||
            'acct2_time1': {'field1': 6, 'field2': 7},
 | 
			
		||||
            'acct3_time3': {'field1': 8, 'field2': 9},
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        expected_data_out = {
 | 
			
		||||
            'acct1_time1': {'out_field1': 16, 'out_field2': 5,
 | 
			
		||||
                'out_field3': 3, 'out_field4': 8, 'out_field5': 0,
 | 
			
		||||
                'out_field6': 0, 'out_field7': 0,},
 | 
			
		||||
            'acct1_time2': {'out_field1': 9, 'out_field2': 5,
 | 
			
		||||
                'out_field3': 0, 'out_field4': 0, 'out_field5': 0,
 | 
			
		||||
                'out_field6': 0, 'out_field7': 0,},
 | 
			
		||||
            'acct2_time1': {'out_field1': 13, 'out_field2': 7,
 | 
			
		||||
                'out_field3': 0, 'out_field4': 0, 'out_field5': 0,
 | 
			
		||||
                'out_field6': 0, 'out_field7': 0,},
 | 
			
		||||
            'acct3_time3': {'out_field1': 17, 'out_field2': 9,
 | 
			
		||||
                'out_field3': 0, 'out_field4': 0, 'out_field5': 0,
 | 
			
		||||
                'out_field6': 0, 'out_field7': 0,},
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        self.assertEquals(expected_data_out,
 | 
			
		||||
            MockLogProcessorDaemon().get_final_info(data_in))
 | 
			
		||||
 | 
			
		||||
    def test_store_processed_files_list(self):
 | 
			
		||||
        class MockInternalProxy:
 | 
			
		||||
            def __init__(self, test, daemon, processed_files):
 | 
			
		||||
                self.test = test
 | 
			
		||||
                self.daemon = daemon
 | 
			
		||||
                self.processed_files = processed_files
 | 
			
		||||
 | 
			
		||||
            def upload_file(self, f, account, container, filename):
 | 
			
		||||
                self.test.assertEquals(self.processed_files,
 | 
			
		||||
                    pickle.loads(f.getvalue()))
 | 
			
		||||
                self.test.assertEquals(self.daemon.log_processor_account,
 | 
			
		||||
                    account)
 | 
			
		||||
                self.test.assertEquals(self.daemon.log_processor_container,
 | 
			
		||||
                    container)
 | 
			
		||||
                self.test.assertEquals(self.daemon.processed_files_filename,
 | 
			
		||||
                    filename)
 | 
			
		||||
 | 
			
		||||
        class MockLogProcessor:
 | 
			
		||||
            def __init__(self, test, daemon, processed_files):
 | 
			
		||||
                self.internal_proxy = MockInternalProxy(test, daemon,
 | 
			
		||||
                    processed_files)
 | 
			
		||||
 | 
			
		||||
        class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
            def __init__(self, test, processed_files):
 | 
			
		||||
                self.log_processor = \
 | 
			
		||||
                    MockLogProcessor(test, self, processed_files)
 | 
			
		||||
                self.log_processor_account = 'account'
 | 
			
		||||
                self.log_processor_container = 'container'
 | 
			
		||||
                self.processed_files_filename = 'filename'
 | 
			
		||||
 | 
			
		||||
        processed_files = set(['a', 'b', 'c'])
 | 
			
		||||
        MockLogProcessorDaemon(self, processed_files).\
 | 
			
		||||
            store_processed_files_list(processed_files)
 | 
			
		||||
 | 
			
		||||
    def test_get_output(self):
 | 
			
		||||
        class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                self._keylist_mapping = {'a':None, 'b':None, 'c':None}
 | 
			
		||||
 | 
			
		||||
        data_in = {
 | 
			
		||||
            ('acct1', 2010, 1, 1, 0): {'a':1, 'b':2, 'c':3},
 | 
			
		||||
            ('acct1', 2010, 10, 10, 10): {'a':10, 'b':20, 'c':30},
 | 
			
		||||
            ('acct2', 2008, 3, 6, 9): {'a':8, 'b':9, 'c':12},
 | 
			
		||||
            ('acct3', 2005, 4, 8, 16): {'a':1, 'b':5, 'c':25},
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        expected_data_out = [
 | 
			
		||||
            ['data_ts', 'account', 'a', 'b', 'c'],
 | 
			
		||||
            ['2010/01/01 00:00:00', 'acct1', '1', '2', '3'],
 | 
			
		||||
            ['2010/10/10 10:00:00', 'acct1', '10', '20', '30'],
 | 
			
		||||
            ['2008/03/06 09:00:00', 'acct2', '8', '9', '12'],
 | 
			
		||||
            ['2005/04/08 16:00:00', 'acct3', '1', '5', '25'],
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        data_out = MockLogProcessorDaemon().get_output(data_in)
 | 
			
		||||
        self.assertEquals(expected_data_out[0], data_out[0])
 | 
			
		||||
 | 
			
		||||
        for row in data_out[1:]:
 | 
			
		||||
            self.assert_(row in expected_data_out)
 | 
			
		||||
 | 
			
		||||
        for row in expected_data_out[1:]:
 | 
			
		||||
            self.assert_(row in data_out)
 | 
			
		||||
 | 
			
		||||
    def test_store_output(self):
 | 
			
		||||
        try:
 | 
			
		||||
            real_strftime = time.strftime
 | 
			
		||||
            mock_strftime_return = '2010/03/02/01/'
 | 
			
		||||
            def mock_strftime(format):
 | 
			
		||||
                self.assertEquals('%Y/%m/%d/%H/', format)
 | 
			
		||||
                return mock_strftime_return
 | 
			
		||||
            log_processor.time.strftime = mock_strftime
 | 
			
		||||
 | 
			
		||||
            data_in = [
 | 
			
		||||
                ['data_ts', 'account', 'a', 'b', 'c'],
 | 
			
		||||
                ['2010/10/10 10:00:00', 'acct1', '1', '2', '3'],
 | 
			
		||||
                ['2010/10/10 10:00:00', 'acct1', '10', '20', '30'],
 | 
			
		||||
                ['2008/03/06 09:00:00', 'acct2', '8', '9', '12'],
 | 
			
		||||
                ['2005/04/08 16:00:00', 'acct3', '1', '5', '25'],
 | 
			
		||||
            ]
 | 
			
		||||
 | 
			
		||||
            expected_output = '\n'.join([','.join(row) for row in data_in])
 | 
			
		||||
            h = hashlib.md5(expected_output).hexdigest()
 | 
			
		||||
            expected_filename = '%s%s.csv.gz' % (mock_strftime_return, h)
 | 
			
		||||
 | 
			
		||||
            class MockInternalProxy:
 | 
			
		||||
                def __init__(self, test, daemon, expected_filename,
 | 
			
		||||
                    expected_output):
 | 
			
		||||
                    self.test = test
 | 
			
		||||
                    self.daemon = daemon
 | 
			
		||||
                    self.expected_filename = expected_filename
 | 
			
		||||
                    self.expected_output = expected_output
 | 
			
		||||
 | 
			
		||||
                def upload_file(self, f, account, container, filename):
 | 
			
		||||
                    self.test.assertEquals(self.daemon.log_processor_account,
 | 
			
		||||
                        account)
 | 
			
		||||
                    self.test.assertEquals(self.daemon.log_processor_container,
 | 
			
		||||
                        container)
 | 
			
		||||
                    self.test.assertEquals(self.expected_filename, filename)
 | 
			
		||||
                    self.test.assertEquals(self.expected_output, f.getvalue())
 | 
			
		||||
 | 
			
		||||
            class MockLogProcessor:
 | 
			
		||||
                def __init__(self, test, daemon, expected_filename,
 | 
			
		||||
                    expected_output):
 | 
			
		||||
                    self.internal_proxy = MockInternalProxy(test, daemon,
 | 
			
		||||
                        expected_filename, expected_output)
 | 
			
		||||
 | 
			
		||||
            class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
                def __init__(self, test, expected_filename, expected_output):
 | 
			
		||||
                    self.log_processor = MockLogProcessor(test, self,
 | 
			
		||||
                        expected_filename, expected_output)
 | 
			
		||||
                    self.log_processor_account = 'account'
 | 
			
		||||
                    self.log_processor_container = 'container'
 | 
			
		||||
                    self.processed_files_filename = 'filename'
 | 
			
		||||
 | 
			
		||||
            MockLogProcessorDaemon(self, expected_filename, expected_output).\
 | 
			
		||||
                store_output(data_in)
 | 
			
		||||
        finally:
 | 
			
		||||
            log_processor.time.strftime = real_strftime
 | 
			
		||||
 | 
			
		||||
    def test_keylist_mapping(self):
 | 
			
		||||
        # Kind of lame test to see if the propery is both
 | 
			
		||||
        # generated by a particular method and cached properly.
 | 
			
		||||
        # The method that actually generates the mapping is
 | 
			
		||||
        # tested elsewhere.
 | 
			
		||||
 | 
			
		||||
        value_return = 'keylist_mapping'
 | 
			
		||||
        class MockLogProcessor:
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                self.call_count = 0
 | 
			
		||||
 | 
			
		||||
            def generate_keylist_mapping(self):
 | 
			
		||||
                self.call_count += 1
 | 
			
		||||
                return value_return
 | 
			
		||||
 | 
			
		||||
        class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                self.log_processor = MockLogProcessor()
 | 
			
		||||
                self._keylist_mapping = None
 | 
			
		||||
 | 
			
		||||
        d = MockLogProcessorDaemon()
 | 
			
		||||
        self.assertEquals(value_return, d.keylist_mapping)
 | 
			
		||||
        self.assertEquals(value_return, d.keylist_mapping)
 | 
			
		||||
        self.assertEquals(1, d.log_processor.call_count)
 | 
			
		||||
 | 
			
		||||
    def test_process_logs(self):
 | 
			
		||||
        try:
 | 
			
		||||
            mock_logs_to_process = 'logs_to_process'
 | 
			
		||||
            mock_processed_files = 'processed_files'
 | 
			
		||||
 | 
			
		||||
            real_multiprocess_collate = log_processor.multiprocess_collate
 | 
			
		||||
            multiprocess_collate_return = 'multiprocess_collate_return'
 | 
			
		||||
 | 
			
		||||
            get_aggregate_data_return = 'get_aggregate_data_return'
 | 
			
		||||
            get_final_info_return = 'get_final_info_return'
 | 
			
		||||
            get_output_return = 'get_output_return'
 | 
			
		||||
 | 
			
		||||
            class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
                def __init__(self, test):
 | 
			
		||||
                    self.test = test
 | 
			
		||||
                    self.total_conf = 'total_conf'
 | 
			
		||||
                    self.logger = 'logger'
 | 
			
		||||
                    self.worker_count = 'worker_count'
 | 
			
		||||
 | 
			
		||||
                def get_aggregate_data(self, processed_files, results):
 | 
			
		||||
                    self.test.assertEquals(mock_processed_files, processed_files)
 | 
			
		||||
                    self.test.assertEquals(multiprocess_collate_return, results)
 | 
			
		||||
                    return get_aggregate_data_return
 | 
			
		||||
 | 
			
		||||
                def get_final_info(self, aggr_data):
 | 
			
		||||
                    self.test.assertEquals(get_aggregate_data_return, aggr_data)
 | 
			
		||||
                    return get_final_info_return
 | 
			
		||||
 | 
			
		||||
                def get_output(self, final_info):
 | 
			
		||||
                    self.test.assertEquals(get_final_info_return, final_info)
 | 
			
		||||
                    return get_output_return
 | 
			
		||||
 | 
			
		||||
            d = MockLogProcessorDaemon(self)
 | 
			
		||||
 | 
			
		||||
            def mock_multiprocess_collate(processor_args, logs_to_process,
 | 
			
		||||
                worker_count):
 | 
			
		||||
                self.assertEquals(d.total_conf, processor_args[0])
 | 
			
		||||
                self.assertEquals(d.logger, processor_args[1])
 | 
			
		||||
 | 
			
		||||
                self.assertEquals(mock_logs_to_process, logs_to_process)
 | 
			
		||||
                self.assertEquals(d.worker_count, worker_count)
 | 
			
		||||
 | 
			
		||||
                return multiprocess_collate_return
 | 
			
		||||
 | 
			
		||||
            log_processor.multiprocess_collate = mock_multiprocess_collate
 | 
			
		||||
 | 
			
		||||
            output = d.process_logs(mock_logs_to_process, mock_processed_files)
 | 
			
		||||
            self.assertEquals(get_output_return, output)
 | 
			
		||||
        finally:
 | 
			
		||||
            log_processor.multiprocess_collate = real_multiprocess_collate
 | 
			
		||||
 | 
			
		||||
    def test_run_once_get_processed_files_list_returns_none(self):
 | 
			
		||||
        class MockLogProcessor:
 | 
			
		||||
            def get_data_list(self, lookback_start, lookback_end,
 | 
			
		||||
                processed_files):
 | 
			
		||||
                raise unittest.TestCase.failureException, \
 | 
			
		||||
                    'Method should not be called'
 | 
			
		||||
 | 
			
		||||
        class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
            def __init__(self):
 | 
			
		||||
                self.logger = DumbLogger()
 | 
			
		||||
                self.log_processor = MockLogProcessor()
 | 
			
		||||
 | 
			
		||||
            def get_lookback_interval(self):
 | 
			
		||||
                return None, None
 | 
			
		||||
 | 
			
		||||
            def get_processed_files_list(self):
 | 
			
		||||
                return None
 | 
			
		||||
 | 
			
		||||
        MockLogProcessorDaemon().run_once()
 | 
			
		||||
 | 
			
		||||
    def test_run_once_no_logs_to_process(self):
 | 
			
		||||
        class MockLogProcessor():
 | 
			
		||||
            def __init__(self, daemon, test):
 | 
			
		||||
                self.daemon = daemon
 | 
			
		||||
                self.test = test
 | 
			
		||||
 | 
			
		||||
            def get_data_list(self, lookback_start, lookback_end,
 | 
			
		||||
                processed_files):
 | 
			
		||||
                self.test.assertEquals(self.daemon.lookback_start,
 | 
			
		||||
                    lookback_start)
 | 
			
		||||
                self.test.assertEquals(self.daemon.lookback_end,
 | 
			
		||||
                    lookback_end)
 | 
			
		||||
                self.test.assertEquals(self.daemon.processed_files,
 | 
			
		||||
                    processed_files)
 | 
			
		||||
                return []
 | 
			
		||||
 | 
			
		||||
        class MockLogProcessorDaemon(log_processor.LogProcessorDaemon):
 | 
			
		||||
            def __init__(self, test):
 | 
			
		||||
                self.logger = DumbLogger()
 | 
			
		||||
                self.log_processor = MockLogProcessor(self, test)
 | 
			
		||||
                self.lookback_start = 'lookback_start'
 | 
			
		||||
                self.lookback_end = 'lookback_end'
 | 
			
		||||
                self.processed_files = ['a', 'b', 'c']
 | 
			
		||||
 | 
			
		||||
            def get_lookback_interval(self):
 | 
			
		||||
                return self.lookback_start, self.lookback_end
 | 
			
		||||
 | 
			
		||||
            def get_processed_files_list(self):
 | 
			
		||||
                return self.processed_files
 | 
			
		||||
 | 
			
		||||
            def process_logs(logs_to_process, processed_files):
 | 
			
		||||
                raise unittest.TestCase.failureException, \
 | 
			
		||||
                    'Method should not be called'
 | 
			
		||||
 | 
			
		||||
        MockLogProcessorDaemon(self).run_once()
 | 
			
		||||
@@ -1,240 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
import unittest
 | 
			
		||||
import os
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from tempfile import mkdtemp
 | 
			
		||||
from shutil import rmtree
 | 
			
		||||
from functools import partial
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
import random
 | 
			
		||||
import string
 | 
			
		||||
 | 
			
		||||
from test.unit import temptree
 | 
			
		||||
from swift.stats import log_uploader
 | 
			
		||||
 | 
			
		||||
import logging
 | 
			
		||||
logging.basicConfig(level=logging.DEBUG)
 | 
			
		||||
LOGGER = logging.getLogger()
 | 
			
		||||
 | 
			
		||||
COMPRESSED_DATA = '\x1f\x8b\x08\x08\x87\xa5zM\x02\xffdata\x00KI,I\x04\x00c' \
 | 
			
		||||
        '\xf3\xf3\xad\x04\x00\x00\x00'
 | 
			
		||||
 | 
			
		||||
access_regex = '''
 | 
			
		||||
    ^
 | 
			
		||||
    (?P<year>[0-9]{4})
 | 
			
		||||
    (?P<month>[0-1][0-9])
 | 
			
		||||
    (?P<day>[0-3][0-9])
 | 
			
		||||
    (?P<hour>[0-2][0-9])
 | 
			
		||||
    .*$
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def mock_appconfig(*args, **kwargs):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MockInternalProxy():
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, **kwargs):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    def create_container(self, *args, **kwargs):
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def upload_file(self, *args, **kwargs):
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
_orig_LogUploader = log_uploader.LogUploader
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MockLogUploader(_orig_LogUploader):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, conf, logger=LOGGER):
 | 
			
		||||
        conf['swift_account'] = conf.get('swift_account', '')
 | 
			
		||||
        conf['container_name'] = conf.get('container_name', '')
 | 
			
		||||
        conf['new_log_cutoff'] = conf.get('new_log_cutoff', '0')
 | 
			
		||||
        conf['source_filename_format'] = conf.get(
 | 
			
		||||
            'source_filename_format', conf.get('filename_format'))
 | 
			
		||||
        log_uploader.LogUploader.__init__(self, conf, 'plugin')
 | 
			
		||||
        self.logger = logger
 | 
			
		||||
        self.uploaded_files = []
 | 
			
		||||
 | 
			
		||||
    def upload_one_log(self, filename, year, month, day, hour):
 | 
			
		||||
        d = {'year': year, 'month': month, 'day': day, 'hour': hour}
 | 
			
		||||
        self.uploaded_files.append((filename, d))
 | 
			
		||||
        _orig_LogUploader.upload_one_log(self, filename, year, month,
 | 
			
		||||
                                         day, hour)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestLogUploader(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def setUp(self):
 | 
			
		||||
        # mock internal proxy
 | 
			
		||||
        self._orig_InternalProxy = log_uploader.InternalProxy
 | 
			
		||||
        self._orig_appconfig = log_uploader.appconfig
 | 
			
		||||
        log_uploader.InternalProxy = MockInternalProxy
 | 
			
		||||
        log_uploader.appconfig = mock_appconfig
 | 
			
		||||
 | 
			
		||||
    def tearDown(self):
 | 
			
		||||
        log_uploader.appconfig = self._orig_appconfig
 | 
			
		||||
        log_uploader.InternalProxy = self._orig_InternalProxy
 | 
			
		||||
 | 
			
		||||
    def test_bad_pattern_in_config(self):
 | 
			
		||||
        files = [datetime.now().strftime('%Y%m%d%H')]
 | 
			
		||||
        with temptree(files, contents=[COMPRESSED_DATA] * len(files)) as t:
 | 
			
		||||
            # invalid pattern
 | 
			
		||||
            conf = {'log_dir': t,
 | 
			
		||||
                    'source_filename_pattern': '%Y%m%d%h'}  # should be %H
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
            self.assertRaises(SystemExit, uploader.upload_all_logs)
 | 
			
		||||
 | 
			
		||||
            conf = {'log_dir': t, 'source_filename_pattern': access_regex}
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
            uploader.upload_all_logs()
 | 
			
		||||
            self.assertEquals(len(uploader.uploaded_files), 1)
 | 
			
		||||
 | 
			
		||||
    def test_pattern_upload_all_logs(self):
 | 
			
		||||
 | 
			
		||||
        # test empty dir
 | 
			
		||||
        with temptree([]) as t:
 | 
			
		||||
            conf = {'log_dir': t}
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
            self.assertRaises(SystemExit, uploader.run_once)
 | 
			
		||||
 | 
			
		||||
        def get_random_length_str(max_len=10, chars=string.ascii_letters):
 | 
			
		||||
            return ''.join(random.choice(chars) for x in
 | 
			
		||||
                           range(random.randint(1, max_len)))
 | 
			
		||||
 | 
			
		||||
        template = 'prefix_%(random)s_%(digits)s.blah.' \
 | 
			
		||||
                '%(datestr)s%(hour)0.2d00-%(next_hour)0.2d00-%(number)s.gz'
 | 
			
		||||
        pattern = '''prefix_.*_[0-9]+\.blah\.
 | 
			
		||||
                     (?P<year>[0-9]{4})
 | 
			
		||||
                     (?P<month>[0-1][0-9])
 | 
			
		||||
                     (?P<day>[0-3][0-9])
 | 
			
		||||
                     (?P<hour>[0-2][0-9])00-[0-9]{2}00
 | 
			
		||||
                     -[0-9]?[0-9]\.gz'''
 | 
			
		||||
        files_that_should_match = []
 | 
			
		||||
        # add some files that match
 | 
			
		||||
        for i in range(24):
 | 
			
		||||
            fname = template % {
 | 
			
		||||
                'random': get_random_length_str(),
 | 
			
		||||
                'digits': get_random_length_str(16, string.digits),
 | 
			
		||||
                'datestr': datetime.now().strftime('%Y%m%d'),
 | 
			
		||||
                'hour': i,
 | 
			
		||||
                'next_hour': i + 1,
 | 
			
		||||
                'number': random.randint(0, 20),
 | 
			
		||||
            }
 | 
			
		||||
            files_that_should_match.append(fname)
 | 
			
		||||
 | 
			
		||||
        # add some files that don't match
 | 
			
		||||
        files = list(files_that_should_match)
 | 
			
		||||
        for i in range(24):
 | 
			
		||||
            fname = template % {
 | 
			
		||||
                'random': get_random_length_str(),
 | 
			
		||||
                'digits': get_random_length_str(16, string.digits),
 | 
			
		||||
                'datestr': datetime.now().strftime('%Y%m'),
 | 
			
		||||
                'hour': i,
 | 
			
		||||
                'next_hour': i + 1,
 | 
			
		||||
                'number': random.randint(0, 20),
 | 
			
		||||
            }
 | 
			
		||||
            files.append(fname)
 | 
			
		||||
 | 
			
		||||
        for fname in files:
 | 
			
		||||
            print fname
 | 
			
		||||
 | 
			
		||||
        with temptree(files, contents=[COMPRESSED_DATA] * len(files)) as t:
 | 
			
		||||
            self.assertEquals(len(os.listdir(t)), 48)
 | 
			
		||||
            conf = {'source_filename_pattern': pattern, 'log_dir': t}
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
            uploader.run_once()
 | 
			
		||||
            self.assertEquals(len(os.listdir(t)), 24)
 | 
			
		||||
            self.assertEquals(len(uploader.uploaded_files), 24)
 | 
			
		||||
            files_that_were_uploaded = set(x[0] for x in
 | 
			
		||||
                                           uploader.uploaded_files)
 | 
			
		||||
            for f in files_that_should_match:
 | 
			
		||||
                self.assert_(os.path.join(t, f) in files_that_were_uploaded)
 | 
			
		||||
 | 
			
		||||
    def test_log_cutoff(self):
 | 
			
		||||
        files = [datetime.now().strftime('%Y%m%d%H')]
 | 
			
		||||
        with temptree(files) as t:
 | 
			
		||||
            conf = {'log_dir': t, 'new_log_cutoff': '7200',
 | 
			
		||||
                    'source_filename_pattern': access_regex}
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
            uploader.run_once()
 | 
			
		||||
            self.assertEquals(len(uploader.uploaded_files), 0)
 | 
			
		||||
            conf = {'log_dir': t, 'new_log_cutoff': '0',
 | 
			
		||||
                    'source_filename_pattern': access_regex}
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
            uploader.run_once()
 | 
			
		||||
            self.assertEquals(len(uploader.uploaded_files), 1)
 | 
			
		||||
 | 
			
		||||
    def test_create_container_fail(self):
 | 
			
		||||
        files = [datetime.now().strftime('%Y%m%d%H')]
 | 
			
		||||
        conf = {'source_filename_pattern': access_regex}
 | 
			
		||||
        with temptree(files) as t:
 | 
			
		||||
            conf['log_dir'] = t
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
            uploader.run_once()
 | 
			
		||||
            self.assertEquals(len(uploader.uploaded_files), 1)
 | 
			
		||||
 | 
			
		||||
        with temptree(files) as t:
 | 
			
		||||
            conf['log_dir'] = t
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
            # mock create_container to fail
 | 
			
		||||
            uploader.internal_proxy.create_container = lambda *args: False
 | 
			
		||||
            uploader.run_once()
 | 
			
		||||
            self.assertEquals(len(uploader.uploaded_files), 0)
 | 
			
		||||
 | 
			
		||||
    def test_unlink_log(self):
 | 
			
		||||
        files = [datetime.now().strftime('%Y%m%d%H')]
 | 
			
		||||
        with temptree(files, contents=[COMPRESSED_DATA]) as t:
 | 
			
		||||
            conf = {'log_dir': t, 'unlink_log': 'false',
 | 
			
		||||
                    'source_filename_pattern': access_regex}
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
            uploader.run_once()
 | 
			
		||||
            self.assertEquals(len(uploader.uploaded_files), 1)
 | 
			
		||||
            # file still there
 | 
			
		||||
            self.assertEquals(len(os.listdir(t)), 1)
 | 
			
		||||
 | 
			
		||||
            conf = {'log_dir': t, 'unlink_log': 'true',
 | 
			
		||||
                    'source_filename_pattern': access_regex}
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
            uploader.run_once()
 | 
			
		||||
            self.assertEquals(len(uploader.uploaded_files), 1)
 | 
			
		||||
            # file gone
 | 
			
		||||
            self.assertEquals(len(os.listdir(t)), 0)
 | 
			
		||||
 | 
			
		||||
    def test_upload_file_failed(self):
 | 
			
		||||
        files = ['plugin-%s' % datetime.now().strftime('%Y%m%d%H')]
 | 
			
		||||
        with temptree(files, contents=[COMPRESSED_DATA]) as t:
 | 
			
		||||
            conf = {'log_dir': t, 'unlink_log': 'true',
 | 
			
		||||
                    'source_filename_pattern': access_regex}
 | 
			
		||||
            uploader = MockLogUploader(conf)
 | 
			
		||||
 | 
			
		||||
            # mock upload_file to fail, and clean up mock
 | 
			
		||||
            def mock_upload_file(self, *args, **kwargs):
 | 
			
		||||
                uploader.uploaded_files.pop()
 | 
			
		||||
                return False
 | 
			
		||||
            uploader.internal_proxy.upload_file = mock_upload_file
 | 
			
		||||
            self.assertRaises(SystemExit, uploader.run_once)
 | 
			
		||||
            # file still there
 | 
			
		||||
            self.assertEquals(len(os.listdir(t)), 1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
@@ -1,29 +0,0 @@
 | 
			
		||||
# Copyright (c) 2010-2011 OpenStack, LLC.
 | 
			
		||||
#
 | 
			
		||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
# you may not use this file except in compliance with the License.
 | 
			
		||||
# You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#    http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 | 
			
		||||
# implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
# TODO: Tests
 | 
			
		||||
 | 
			
		||||
import unittest
 | 
			
		||||
from swift.stats import stats_processor
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestStatsProcessor(unittest.TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_placeholder(self):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
		Reference in New Issue
	
	Block a user