OpenStack library for messaging
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

515 lines
19KB

  1. # Copyright 2013 Red Hat, Inc.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  4. # not use this file except in compliance with the License. You may obtain
  5. # a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  11. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  12. # License for the specific language governing permissions and limitations
  13. # under the License.
  14. __all__ = ['AMQPDriverBase']
  15. import logging
  16. import threading
  17. import time
  18. import uuid
  19. import cachetools
  20. from oslo_utils import timeutils
  21. from six import moves
  22. import oslo_messaging
  23. from oslo_messaging._drivers import amqp as rpc_amqp
  24. from oslo_messaging._drivers import base
  25. from oslo_messaging._drivers import common as rpc_common
  26. from oslo_messaging._i18n import _
  27. from oslo_messaging._i18n import _LE
  28. from oslo_messaging._i18n import _LI
  29. from oslo_messaging._i18n import _LW
  30. LOG = logging.getLogger(__name__)
  31. class AMQPIncomingMessage(base.RpcIncomingMessage):
  32. def __init__(self, listener, ctxt, message, unique_id, msg_id, reply_q,
  33. obsolete_reply_queues):
  34. super(AMQPIncomingMessage, self).__init__(ctxt, message)
  35. self.listener = listener
  36. self.unique_id = unique_id
  37. self.msg_id = msg_id
  38. self.reply_q = reply_q
  39. self._obsolete_reply_queues = obsolete_reply_queues
  40. self.stopwatch = timeutils.StopWatch()
  41. self.stopwatch.start()
  42. def _send_reply(self, conn, reply=None, failure=None, log_failure=True):
  43. if not self._obsolete_reply_queues.reply_q_valid(self.reply_q,
  44. self.msg_id):
  45. return
  46. if failure:
  47. failure = rpc_common.serialize_remote_exception(failure,
  48. log_failure)
  49. # NOTE(sileht): ending can be removed in N*, see Listener.wait()
  50. # for more detail.
  51. msg = {'result': reply, 'failure': failure, 'ending': True,
  52. '_msg_id': self.msg_id}
  53. rpc_amqp._add_unique_id(msg)
  54. unique_id = msg[rpc_amqp.UNIQUE_ID]
  55. LOG.debug("sending reply msg_id: %(msg_id)s "
  56. "reply queue: %(reply_q)s "
  57. "time elapsed: %(elapsed)ss", {
  58. 'msg_id': self.msg_id,
  59. 'unique_id': unique_id,
  60. 'reply_q': self.reply_q,
  61. 'elapsed': self.stopwatch.elapsed()})
  62. conn.direct_send(self.reply_q, rpc_common.serialize_msg(msg))
  63. def reply(self, reply=None, failure=None, log_failure=True):
  64. if not self.msg_id:
  65. # NOTE(Alexei_987) not sending reply, if msg_id is empty
  66. # because reply should not be expected by caller side
  67. return
  68. # NOTE(sileht): return without hold the a connection if possible
  69. if not self._obsolete_reply_queues.reply_q_valid(self.reply_q,
  70. self.msg_id):
  71. return
  72. # NOTE(sileht): we read the configuration value from the driver
  73. # to be able to backport this change in previous version that
  74. # still have the qpid driver
  75. duration = self.listener.driver.missing_destination_retry_timeout
  76. timer = rpc_common.DecayingTimer(duration=duration)
  77. timer.start()
  78. while True:
  79. try:
  80. with self.listener.driver._get_connection(
  81. rpc_common.PURPOSE_SEND) as conn:
  82. self._send_reply(conn, reply, failure,
  83. log_failure=log_failure)
  84. return
  85. except rpc_amqp.AMQPDestinationNotFound:
  86. if timer.check_return() > 0:
  87. LOG.debug(("The reply %(msg_id)s cannot be sent "
  88. "%(reply_q)s reply queue don't exist, "
  89. "retrying..."), {
  90. 'msg_id': self.msg_id,
  91. 'reply_q': self.reply_q})
  92. time.sleep(0.25)
  93. else:
  94. self._obsolete_reply_queues.add(self.reply_q, self.msg_id)
  95. LOG.info(_LI("The reply %(msg_id)s cannot be sent "
  96. "%(reply_q)s reply queue don't exist after "
  97. "%(duration)s sec abandoning..."), {
  98. 'msg_id': self.msg_id,
  99. 'reply_q': self.reply_q,
  100. 'duration': duration})
  101. return
  102. def acknowledge(self):
  103. self.message.acknowledge()
  104. self.listener.msg_id_cache.add(self.unique_id)
  105. def requeue(self):
  106. # NOTE(sileht): In case of the connection is lost between receiving the
  107. # message and requeing it, this requeue call fail
  108. # but because the message is not acknowledged and not added to the
  109. # msg_id_cache, the message will be reconsumed, the only difference is
  110. # the message stay at the beginning of the queue instead of moving to
  111. # the end.
  112. self.message.requeue()
  113. class ObsoleteReplyQueuesCache(object):
  114. """Cache of reply queue id that doesn't exists anymore.
  115. NOTE(sileht): In case of a broker restart/failover
  116. a reply queue can be unreachable for short period
  117. the IncomingMessage.send_reply will block for 60 seconds
  118. in this case or until rabbit recovers.
  119. But in case of the reply queue is unreachable because the
  120. rpc client is really gone, we can have a ton of reply to send
  121. waiting 60 seconds.
  122. This leads to a starvation of connection of the pool
  123. The rpc server take to much time to send reply, other rpc client will
  124. raise TimeoutError because their don't receive their replies in time.
  125. This object cache stores already known gone client to not wait 60 seconds
  126. and hold a connection of the pool.
  127. Keeping 200 last gone rpc client for 1 minute is enough
  128. and doesn't hold to much memory.
  129. """
  130. SIZE = 200
  131. TTL = 60
  132. def __init__(self):
  133. self._lock = threading.RLock()
  134. self._cache = cachetools.TTLCache(self.SIZE, self.TTL)
  135. def reply_q_valid(self, reply_q, msg_id):
  136. if reply_q in self._cache:
  137. self._no_reply_log(reply_q, msg_id)
  138. return False
  139. return True
  140. def add(self, reply_q, msg_id):
  141. with self._lock:
  142. self._cache.update({reply_q: msg_id})
  143. self._no_reply_log(reply_q, msg_id)
  144. def _no_reply_log(self, reply_q, msg_id):
  145. LOG.warning(_LW("%(reply_queue)s doesn't exists, drop reply to "
  146. "%(msg_id)s"), {'reply_queue': reply_q,
  147. 'msg_id': msg_id})
  148. class AMQPListener(base.Listener):
  149. def __init__(self, driver, conn):
  150. super(AMQPListener, self).__init__(driver.prefetch_size)
  151. self.driver = driver
  152. self.conn = conn
  153. self.msg_id_cache = rpc_amqp._MsgIdCache()
  154. self.incoming = []
  155. self._stopped = threading.Event()
  156. self._obsolete_reply_queues = ObsoleteReplyQueuesCache()
  157. def __call__(self, message):
  158. ctxt = rpc_amqp.unpack_context(message)
  159. # FIXME(sileht): Don't log the message until strutils is more
  160. # efficient, (rpc_amqp.unpack_context already log the context)
  161. # LOG.debug(u'received: %s',
  162. # strutils.mask_password(six.text_type(dict(message))))
  163. unique_id = self.msg_id_cache.check_duplicate_message(message)
  164. LOG.debug("received message msg_id: %(msg_id)s reply to %(queue)s", {
  165. 'queue': ctxt.reply_q, 'msg_id': ctxt.msg_id})
  166. self.incoming.append(AMQPIncomingMessage(self,
  167. ctxt.to_dict(),
  168. message,
  169. unique_id,
  170. ctxt.msg_id,
  171. ctxt.reply_q,
  172. self._obsolete_reply_queues))
  173. @base.batch_poll_helper
  174. def poll(self, timeout=None):
  175. while not self._stopped.is_set():
  176. if self.incoming:
  177. return self.incoming.pop(0)
  178. try:
  179. self.conn.consume(timeout=timeout)
  180. except rpc_common.Timeout:
  181. return None
  182. def stop(self):
  183. self._stopped.set()
  184. self.conn.stop_consuming()
  185. def cleanup(self):
  186. # Closes listener connection
  187. self.conn.close()
  188. class ReplyWaiters(object):
  189. WAKE_UP = object()
  190. def __init__(self):
  191. self._queues = {}
  192. self._wrn_threshold = 10
  193. def get(self, msg_id, timeout):
  194. try:
  195. return self._queues[msg_id].get(block=True, timeout=timeout)
  196. except moves.queue.Empty:
  197. raise oslo_messaging.MessagingTimeout(
  198. 'Timed out waiting for a reply '
  199. 'to message ID %s' % msg_id)
  200. def put(self, msg_id, message_data):
  201. queue = self._queues.get(msg_id)
  202. if not queue:
  203. LOG.info(_LI('No calling threads waiting for msg_id : %s'), msg_id)
  204. LOG.debug(' queues: %(queues)s, message: %(message)s',
  205. {'queues': len(self._queues), 'message': message_data})
  206. else:
  207. queue.put(message_data)
  208. def add(self, msg_id):
  209. self._queues[msg_id] = moves.queue.Queue()
  210. if len(self._queues) > self._wrn_threshold:
  211. LOG.warning(_LW('Number of call queues is greater than warning '
  212. 'threshold: %(old_threshold)s. There could be a '
  213. 'leak. Increasing threshold to: %(threshold)s'),
  214. {'old_threshold': self._wrn_threshold,
  215. 'threshold': self._wrn_threshold * 2})
  216. self._wrn_threshold *= 2
  217. def remove(self, msg_id):
  218. del self._queues[msg_id]
  219. class ReplyWaiter(object):
  220. def __init__(self, reply_q, conn, allowed_remote_exmods):
  221. self.conn = conn
  222. self.allowed_remote_exmods = allowed_remote_exmods
  223. self.msg_id_cache = rpc_amqp._MsgIdCache()
  224. self.waiters = ReplyWaiters()
  225. self.conn.declare_direct_consumer(reply_q, self)
  226. self._thread_exit_event = threading.Event()
  227. self._thread = threading.Thread(target=self.poll)
  228. self._thread.daemon = True
  229. self._thread.start()
  230. def stop(self):
  231. if self._thread:
  232. self._thread_exit_event.set()
  233. self.conn.stop_consuming()
  234. self._thread.join()
  235. self._thread = None
  236. def poll(self):
  237. while not self._thread_exit_event.is_set():
  238. try:
  239. self.conn.consume()
  240. except Exception:
  241. LOG.exception(_LE("Failed to process incoming message, "
  242. "retrying..."))
  243. def __call__(self, message):
  244. message.acknowledge()
  245. incoming_msg_id = message.pop('_msg_id', None)
  246. if message.get('ending'):
  247. LOG.debug("received reply msg_id: %s", incoming_msg_id)
  248. self.waiters.put(incoming_msg_id, message)
  249. def listen(self, msg_id):
  250. self.waiters.add(msg_id)
  251. def unlisten(self, msg_id):
  252. self.waiters.remove(msg_id)
  253. @staticmethod
  254. def _raise_timeout_exception(msg_id):
  255. raise oslo_messaging.MessagingTimeout(
  256. _('Timed out waiting for a reply to message ID %s.') % msg_id)
  257. def _process_reply(self, data):
  258. self.msg_id_cache.check_duplicate_message(data)
  259. if data['failure']:
  260. failure = data['failure']
  261. result = rpc_common.deserialize_remote_exception(
  262. failure, self.allowed_remote_exmods)
  263. else:
  264. result = data.get('result', None)
  265. ending = data.get('ending', False)
  266. return result, ending
  267. def wait(self, msg_id, timeout):
  268. # NOTE(sileht): for each msg_id we receive two amqp message
  269. # first one with the payload, a second one to ensure the other
  270. # have finish to send the payload
  271. # NOTE(viktors): We are going to remove this behavior in the N
  272. # release, but we need to keep backward compatibility, so we should
  273. # support both cases for now.
  274. timer = rpc_common.DecayingTimer(duration=timeout)
  275. timer.start()
  276. final_reply = None
  277. ending = False
  278. while not ending:
  279. timeout = timer.check_return(self._raise_timeout_exception, msg_id)
  280. try:
  281. message = self.waiters.get(msg_id, timeout=timeout)
  282. except moves.queue.Empty:
  283. self._raise_timeout_exception(msg_id)
  284. reply, ending = self._process_reply(message)
  285. if reply is not None:
  286. # NOTE(viktors): This can be either first _send_reply() with an
  287. # empty `result` field or a second _send_reply() with
  288. # ending=True and no `result` field.
  289. final_reply = reply
  290. return final_reply
  291. class AMQPDriverBase(base.BaseDriver):
  292. missing_destination_retry_timeout = 0
  293. def __init__(self, conf, url, connection_pool,
  294. default_exchange=None, allowed_remote_exmods=None):
  295. super(AMQPDriverBase, self).__init__(conf, url, default_exchange,
  296. allowed_remote_exmods)
  297. self._default_exchange = default_exchange
  298. self._connection_pool = connection_pool
  299. self._reply_q_lock = threading.Lock()
  300. self._reply_q = None
  301. self._reply_q_conn = None
  302. self._waiter = None
  303. def _get_exchange(self, target):
  304. return target.exchange or self._default_exchange
  305. def _get_connection(self, purpose=rpc_common.PURPOSE_SEND):
  306. return rpc_common.ConnectionContext(self._connection_pool,
  307. purpose=purpose)
  308. def _get_reply_q(self):
  309. with self._reply_q_lock:
  310. if self._reply_q is not None:
  311. return self._reply_q
  312. reply_q = 'reply_' + uuid.uuid4().hex
  313. conn = self._get_connection(rpc_common.PURPOSE_LISTEN)
  314. self._waiter = ReplyWaiter(reply_q, conn,
  315. self._allowed_remote_exmods)
  316. self._reply_q = reply_q
  317. self._reply_q_conn = conn
  318. return self._reply_q
  319. def _send(self, target, ctxt, message,
  320. wait_for_reply=None, timeout=None,
  321. envelope=True, notify=False, retry=None):
  322. # FIXME(markmc): remove this temporary hack
  323. class Context(object):
  324. def __init__(self, d):
  325. self.d = d
  326. def to_dict(self):
  327. return self.d
  328. context = Context(ctxt)
  329. msg = message
  330. if wait_for_reply:
  331. msg_id = uuid.uuid4().hex
  332. msg.update({'_msg_id': msg_id})
  333. msg.update({'_reply_q': self._get_reply_q()})
  334. rpc_amqp._add_unique_id(msg)
  335. unique_id = msg[rpc_amqp.UNIQUE_ID]
  336. rpc_amqp.pack_context(msg, context)
  337. if envelope:
  338. msg = rpc_common.serialize_msg(msg)
  339. if wait_for_reply:
  340. self._waiter.listen(msg_id)
  341. log_msg = "CALL msg_id: %s " % msg_id
  342. else:
  343. log_msg = "CAST unique_id: %s " % unique_id
  344. try:
  345. with self._get_connection(rpc_common.PURPOSE_SEND) as conn:
  346. if notify:
  347. exchange = self._get_exchange(target)
  348. log_msg += "NOTIFY exchange '%(exchange)s'" \
  349. " topic '%(topic)s'" % {
  350. 'exchange': exchange,
  351. 'topic': target.topic}
  352. LOG.debug(log_msg)
  353. conn.notify_send(exchange, target.topic, msg, retry=retry)
  354. elif target.fanout:
  355. log_msg += "FANOUT topic '%(topic)s'" % {
  356. 'topic': target.topic}
  357. LOG.debug(log_msg)
  358. conn.fanout_send(target.topic, msg, retry=retry)
  359. else:
  360. topic = target.topic
  361. exchange = self._get_exchange(target)
  362. if target.server:
  363. topic = '%s.%s' % (target.topic, target.server)
  364. log_msg += "exchange '%(exchange)s'" \
  365. " topic '%(topic)s'" % {
  366. 'exchange': exchange,
  367. 'topic': target.topic}
  368. LOG.debug(log_msg)
  369. conn.topic_send(exchange_name=exchange, topic=topic,
  370. msg=msg, timeout=timeout, retry=retry)
  371. if wait_for_reply:
  372. result = self._waiter.wait(msg_id, timeout)
  373. if isinstance(result, Exception):
  374. raise result
  375. return result
  376. finally:
  377. if wait_for_reply:
  378. self._waiter.unlisten(msg_id)
  379. def send(self, target, ctxt, message, wait_for_reply=None, timeout=None,
  380. retry=None):
  381. return self._send(target, ctxt, message, wait_for_reply, timeout,
  382. retry=retry)
  383. def send_notification(self, target, ctxt, message, version, retry=None):
  384. return self._send(target, ctxt, message,
  385. envelope=(version == 2.0), notify=True, retry=retry)
  386. def listen(self, target):
  387. conn = self._get_connection(rpc_common.PURPOSE_LISTEN)
  388. listener = AMQPListener(self, conn)
  389. conn.declare_topic_consumer(exchange_name=self._get_exchange(target),
  390. topic=target.topic,
  391. callback=listener)
  392. conn.declare_topic_consumer(exchange_name=self._get_exchange(target),
  393. topic='%s.%s' % (target.topic,
  394. target.server),
  395. callback=listener)
  396. conn.declare_fanout_consumer(target.topic, listener)
  397. return listener
  398. def listen_for_notifications(self, targets_and_priorities, pool):
  399. conn = self._get_connection(rpc_common.PURPOSE_LISTEN)
  400. listener = AMQPListener(self, conn)
  401. for target, priority in targets_and_priorities:
  402. conn.declare_topic_consumer(
  403. exchange_name=self._get_exchange(target),
  404. topic='%s.%s' % (target.topic, priority),
  405. callback=listener, queue_name=pool)
  406. return listener
  407. def cleanup(self):
  408. if self._connection_pool:
  409. self._connection_pool.empty()
  410. self._connection_pool = None
  411. with self._reply_q_lock:
  412. if self._reply_q is not None:
  413. self._waiter.stop()
  414. self._reply_q_conn.close()
  415. self._reply_q_conn = None
  416. self._reply_q = None
  417. self._waiter = None