Make link URL matching smarter
Ignore-this: bff39c43073f3576e31bda52b7383e6b - Link URL matching no longer requires the URL to be at the beginningfront of the line - Includes test case and documentation fix. darcs-hash:20101227093449-82ea9-727739b99ac86256216350b90dc3e710ea181f51.gz
This commit is contained in:
parent
11dcb55c4b
commit
a7c62223ea
@ -169,21 +169,19 @@ adjust and have MeetBot re-process the logs later.
|
|||||||
release.
|
release.
|
||||||
|
|
||||||
#link
|
#link
|
||||||
Add a link to the minutes. The URL must be the first thing on the
|
|
||||||
line, separated by a space from the rest of the line, and it will be
|
Add a link to the minutes. The URL will be properly detected within
|
||||||
properly hyperlinked. This command is automatically detected if the line
|
the line in most cases - the URL can't contain spaces. This command
|
||||||
starts with http:, https:, mailto:, and some other common protocols
|
is automatically detected if the line starts with http:, https:,
|
||||||
defined in the ``UrlProtocols`` configuration variable. Examples::
|
mailto:, and some other common protocols defined in the
|
||||||
|
``UrlProtocols`` configuration variable. Examples::
|
||||||
|
|
||||||
< MrBeige> #link http://wiki.debian.org/MeetBot/ is the main page
|
< MrBeige> #link http://wiki.debian.org/MeetBot/ is the main page
|
||||||
< MrBeige> http://wiki.debian.org/MeetBot/ is the main page
|
< MrBeige> http://wiki.debian.org/MeetBot/ is the main page
|
||||||
|
|
||||||
Both of these two examples are equivalent, and will hyperlink
|
|
||||||
properly. The first example below won't hyperlink properly, the
|
|
||||||
second one won't be automatically detected::
|
|
||||||
|
|
||||||
< MrBeige> #link the main page is http://wiki.debian.org/MeetBot/
|
< MrBeige> #link the main page is http://wiki.debian.org/MeetBot/
|
||||||
< MrBeige> the main page is http://wiki.debian.org/MeetBot/
|
so go there
|
||||||
|
< MrBeige> the main page is http://wiki.debian.org/MeetBot/ so go
|
||||||
|
there. (This will NOT be detected automatically)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
###
|
###
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import writers
|
import writers
|
||||||
@ -73,10 +74,9 @@ class _BaseItem(object):
|
|||||||
replacements[name] = getattr(self, name)
|
replacements[name] = getattr(self, name)
|
||||||
replacements['nick'] = escapewith(replacements['nick'])
|
replacements['nick'] = escapewith(replacements['nick'])
|
||||||
replacements['link'] = self.logURL(M)
|
replacements['link'] = self.logURL(M)
|
||||||
if 'line' in replacements:
|
for key in ('line', 'prefix', 'suffix', 'topic'):
|
||||||
replacements['line'] = escapewith(replacements['line'])
|
if key in replacements:
|
||||||
if 'topic' in replacements:
|
replacements[key] = escapewith(replacements[key])
|
||||||
replacements['topic'] = escapewith(replacements['topic'])
|
|
||||||
if 'url' in replacements:
|
if 'url' in replacements:
|
||||||
replacements['url_quoteescaped'] = \
|
replacements['url_quoteescaped'] = \
|
||||||
escapewith(self.url.replace('"', "%22"))
|
escapewith(self.url.replace('"', "%22"))
|
||||||
@ -228,24 +228,40 @@ class Rejected(GenericItem):
|
|||||||
class Link(_BaseItem):
|
class Link(_BaseItem):
|
||||||
itemtype = 'LINK'
|
itemtype = 'LINK'
|
||||||
html_template = """<tr><td><a href='%(link)s#%(anchor)s'>%(time)s</a></td>
|
html_template = """<tr><td><a href='%(link)s#%(anchor)s'>%(time)s</a></td>
|
||||||
<td>%(itemtype)s</td><td>%(nick)s</td><td>%(starthtml)s<a href="%(url)s">%(url_readable)s</a> %(line)s%(endhtml)s</td>
|
<td>%(itemtype)s</td><td>%(nick)s</td><td>%(starthtml)s%(prefix)s<a href="%(url)s">%(url_readable)s</a>%(suffix)s%(endhtml)s</td>
|
||||||
</tr>"""
|
</tr>"""
|
||||||
#html2_template = ("""<i>%(itemtype)s</i>: %(starthtml)s<a href="%(url)s">%(url_readable)s</a> %(line)s%(endhtml)s """
|
html2_template = ("""%(starthtml)s%(prefix)s<a href="%(url)s">%(url_readable)s</a>%(suffix)s%(endhtml)s """
|
||||||
# """(%(nick)s, <a href='%(link)s#%(anchor)s'>%(time)s</a>)""")
|
|
||||||
#html2_template = ("""<i>%(itemtype)s</i>: %(starthtml)s<a href="%(url)s">%(url_readable)s</a> %(line)s%(endhtml)s """
|
|
||||||
# """(<a href='%(link)s#%(anchor)s'>%(nick)s</a>, %(time)s)""")
|
|
||||||
html2_template = ("""%(starthtml)s<a href="%(url)s">%(url_readable)s</a> %(line)s%(endhtml)s """
|
|
||||||
"""<span class="details">"""
|
"""<span class="details">"""
|
||||||
"""(<a href='%(link)s#%(anchor)s'>%(nick)s</a>, """
|
"""(<a href='%(link)s#%(anchor)s'>%(nick)s</a>, """
|
||||||
"""%(time)s)"""
|
"""%(time)s)"""
|
||||||
"""</span>""")
|
"""</span>""")
|
||||||
rst_template = """*%(itemtype)s*: %(startrst)s%(url)s %(line)s%(endrst)s (%(rstref)s_)"""
|
rst_template = """*%(itemtype)s*: %(startrst)s%(prefix)s%(url)s%(suffix)s%(endrst)s (%(rstref)s_)"""
|
||||||
text_template = """%(itemtype)s: %(starttext)s%(url)s %(line)s%(endtext)s (%(nick)s, %(time)s)"""
|
text_template = """%(itemtype)s: %(starttext)s%(prefix)s%(url)s%(suffix)s%(endtext)s (%(nick)s, %(time)s)"""
|
||||||
mw_template = """''%(itemtype)s:'' %(startmw)s%(url)s %(line)s%(endmw)s (%(nick)s, %(time)s)"""
|
mw_template = """''%(itemtype)s:'' %(startmw)s%(prefix)s%(url)s%(suffix)s%(endmw)s (%(nick)s, %(time)s)"""
|
||||||
def __init__(self, nick, line, linenum, time_):
|
def __init__(self, nick, line, linenum, time_, M):
|
||||||
self.nick = nick ; self.linenum = linenum
|
self.nick = nick ; self.linenum = linenum
|
||||||
self.time = time.strftime("%H:%M:%S", time_)
|
self.time = time.strftime("%H:%M:%S", time_)
|
||||||
self.url, self.line = (line+' ').split(' ', 1)
|
self.line = line
|
||||||
|
|
||||||
|
protocols = M.config.UrlProtocols
|
||||||
|
protocols = '|'.join(re.escape(p) for p in protocols)
|
||||||
|
protocols = '(?:'+protocols+')'
|
||||||
|
# This is gross.
|
||||||
|
# (.*?) - any prefix, non-greedy
|
||||||
|
# (%s//[^\s]+ - protocol://... until the next space
|
||||||
|
# (?<!\.|\)) - but the last character can NOT be . or )
|
||||||
|
# (.*) - any suffix
|
||||||
|
url_re = re.compile(r'(.*?)(%s//[^\s]+(?<!\.|\)))(.*)'%protocols)
|
||||||
|
m = url_re.match(line)
|
||||||
|
if m:
|
||||||
|
self.prefix = m.group(1)
|
||||||
|
self.url = m.group(2)
|
||||||
|
self.suffix = m.group(3)
|
||||||
|
else:
|
||||||
|
# simple matching, the old way.
|
||||||
|
self.url, self.suffix = (line+' ').split(' ', 1)
|
||||||
|
self.suffix = ' '+self.suffix
|
||||||
|
self.prefix = ''
|
||||||
# URL-sanitization
|
# URL-sanitization
|
||||||
self.url_readable = self.url # readable line version
|
self.url_readable = self.url # readable line version
|
||||||
self.url = self.url
|
self.url = self.url
|
||||||
|
@ -448,7 +448,7 @@ class MeetingCommands(object):
|
|||||||
self.addnick(nick, lines=0)
|
self.addnick(nick, lines=0)
|
||||||
def do_link(self, **kwargs):
|
def do_link(self, **kwargs):
|
||||||
"""Add informational item to the minutes."""
|
"""Add informational item to the minutes."""
|
||||||
m = items.Link(**kwargs)
|
m = items.Link(M=self, **kwargs)
|
||||||
self.additem(m)
|
self.additem(m)
|
||||||
def do_commands(self, **kwargs):
|
def do_commands(self, **kwargs):
|
||||||
commands = [ "#"+x[3:] for x in dir(self) if x[:3]=="do_" ]
|
commands = [ "#"+x[3:] for x in dir(self) if x[:3]=="do_" ]
|
||||||
|
@ -222,6 +222,32 @@ class MeetBotTest(unittest.TestCase):
|
|||||||
results, re.IGNORECASE), \
|
results, re.IGNORECASE), \
|
||||||
"Nick full-word matching failed"
|
"Nick full-word matching failed"
|
||||||
|
|
||||||
|
def test_urlMatching(self):
|
||||||
|
"""Test properly detection of URLs in lines
|
||||||
|
"""
|
||||||
|
script = """
|
||||||
|
20:13:50 <x> #startmeeting
|
||||||
|
20:13:50 <x> #link prefix http://site1.com suffix
|
||||||
|
20:13:50 <x> http://site2.com suffix
|
||||||
|
20:13:50 <x> ftp://ftpsite1.com suffix
|
||||||
|
20:13:50 <x> #link prefix ftp://ftpsite2.com suffix
|
||||||
|
20:13:50 <x> irc://ircsite1.com suffix
|
||||||
|
20:13:50 <x> mailto://a@mail.com suffix
|
||||||
|
20:13:50 <x> #endmeeting
|
||||||
|
"""
|
||||||
|
M = process_meeting(script)
|
||||||
|
results = M.save()['.html']
|
||||||
|
assert re.search(r'prefix.*href.*http://site1.com.*suffix',
|
||||||
|
results), "URL missing 1"
|
||||||
|
assert re.search(r'href.*http://site2.com.*suffix',
|
||||||
|
results), "URL missing 2"
|
||||||
|
assert re.search(r'href.*ftp://ftpsite1.com.*suffix',
|
||||||
|
results), "URL missing 3"
|
||||||
|
assert re.search(r'prefix.*href.*ftp://ftpsite2.com.*suffix',
|
||||||
|
results), "URL missing 4"
|
||||||
|
assert re.search(r'href.*mailto://a@mail.com.*suffix',
|
||||||
|
results), "URL missing 5"
|
||||||
|
|
||||||
def t_css(self):
|
def t_css(self):
|
||||||
"""Runs all CSS-related tests.
|
"""Runs all CSS-related tests.
|
||||||
"""
|
"""
|
||||||
|
@ -64,7 +64,15 @@
|
|||||||
# links
|
# links
|
||||||
20:13:50 <MrBeige> #topic Links
|
20:13:50 <MrBeige> #topic Links
|
||||||
20:13:50 <Utahraptor> #link http://test<b>.zgib.net
|
20:13:50 <Utahraptor> #link http://test<b>.zgib.net
|
||||||
|
20:13:50 <Utahraptor> #link ftp://test<b>.zgib.net "
|
||||||
|
20:13:50 <Utahraptor> #link mailto://a@bla"h.com
|
||||||
20:13:50 <Utahraptor> #link http://test.zgib.net/&testpage
|
20:13:50 <Utahraptor> #link http://test.zgib.net/&testpage
|
||||||
|
20:13:50 <Utahraptor> #link prefix http://test.zgib.net/&testpage suffix
|
||||||
|
20:13:50 <Utahraptor> #link prefix ftp://test.zg"ib.net/&testpage suffix
|
||||||
|
20:13:50 <Utahraptor> #link prefix mailto://a@blah.com&testpage suffix
|
||||||
|
20:13:50 <Utahraptor> #link prefix http://google.com/. suffix
|
||||||
|
20:13:50 <Utahraptor> #link prefix (http://google.com/) suffix
|
||||||
|
|
||||||
|
|
||||||
# accents
|
# accents
|
||||||
20:13:50 <MrBeige> #topic Character sets
|
20:13:50 <MrBeige> #topic Character sets
|
||||||
|
Loading…
Reference in New Issue
Block a user