Ticket #3214: sql-statement-iterator.diff

File sql-statement-iterator.diff, 6.0 KB (added by shaunc <shaun@…>, 18 years ago)

patch, introduces 'sql-statement-iterator' which correctly deals with quotes

  • core/management.py

     
    66import os, re, shutil, sys, textwrap
    77from optparse import OptionParser
    88from django.utils import termcolors
     9from django.utils.sqltools import sql_statement_iterator
    910
    1011# For Python 2.3
    1112if not hasattr(__builtins__, 'set'):
     
    348349    for sql_file in sql_files:
    349350        if os.path.exists(sql_file):
    350351            fp = open(sql_file, 'U')
    351             for statement in statements.split(fp.read()):
     352            for statement in sql_statement_iterator( fp.read(), double_percent_signs = True ):
    352353                # Remove any comments from the file
    353354                statement = re.sub(r"--.*[\n\Z]", "", statement)
    354355                if statement.strip():
  • utils/sqltools.py

     
     1r"""
     2Contains tools to massage sql text.
     3
     4#>>> re.findall( r'(?<!\%)(\%)(?!\%)', ' %a%% ' )
     5
     6
     7"""
     8import re
     9
     10_single_or_stmtend = r"(?P<semi>\;)|(?P<comment>\-\-)|(?P<ccomment>\/\*)|(?P<endccomment>\*\/)|(?P<lineend>\n|\r$)|(?P<firstquo>^\')|(?P<escquo>\'\')|(?:(?P<midquo>\')(?!\'))"
     11_dollarq = r"(?P<dollarquo>\$\w*\$)"
     12_percent = r"(?<!\%)\%(?!\%)"
     13
     14_single_id_re = re.compile( _single_or_stmtend )
     15_id_re = re.compile( '|'.join( ( _single_or_stmtend, _dollarq ) ) )
     16_percent_re = re.compile( _percent )
     17
     18def sql_statement_iterator(
     19        script, dollar_quotes = True, double_percent_signs = False
     20        ):
     21    r"""
     22    Iterate through the statements in an sql script,
     23    while respecting the boundaries of strings and comments.
     24
     25    Supports normal single quotes, dollar_quotes (unless turned off).
     26    Supports normal sql comments, and c-style comments
     27    If 'double_percent_signs' is specified, standalone percent signs
     28    will be doubled. This is useful when no parameters are intended,
     29    as otherwise python db api treats them as substitution points.
     30
     31
     32    Test various quirks of quotes:
     33
     34    >>> def show( s, **kw ):
     35    ...     for line in sql_statement_iterator( s, **kw ):
     36    ...         print line
     37
     38    >>> show( 'this is a \'simple;\'; test' )
     39    this is a 'simple;';
     40     test
     41    >>> show( ';' )
     42    ;
     43    >>> show( 'let\'s test a $DOLLAR$ quoted; $DOLLAR$ string' )
     44    let's test a $DOLLAR$ quoted; $DOLLAR$ string
     45
     46    >>> show( 'testing $$ nested; $A$ quotes; \'with;\' $A$;$$ many; fake; semicolons;'  )
     47    testing $$ nested; $A$ quotes; 'with;' $A$;$$ many;
     48     fake;
     49     semicolons;
     50
     51    >>> show( '''nested misformed $$ quote's $$; dont; cause; problems;''' )
     52    nested misformed $$ quote's $$;
     53     dont;
     54     cause;
     55     problems;
     56
     57    A real function:
     58   
     59    >>> show( '''CREATE OR REPLACE FUNCTION serviceIsCurrent( srow client_service )
     60    ... RETURNS bool LANGUAGE PLPGSQL AS $BODY$
     61    ... BEGIN
     62    ...     RETURN srow.start <= current_date AND (
     63    ...         srow.end IS NULL OR srow.end >= current_date ) AND
     64    ...         srow."_superceededBy_id" IS NULL;
     65    ... END;
     66    ... $BODY$;
     67    ...  ''' )
     68    CREATE OR REPLACE FUNCTION serviceIsCurrent( srow client_service )
     69    RETURNS bool LANGUAGE PLPGSQL AS $BODY$
     70    BEGIN
     71        RETURN srow.start <= current_date AND (
     72            srow.end IS NULL OR srow.end >= current_date ) AND
     73            srow."_superceededBy_id" IS NULL;
     74    END;
     75    $BODY$;
     76    <BLANKLINE>
     77    <BLANKLINE>
     78   
     79    Test ''
     80   
     81#    >>> show( "that''s it ';' ." )
     82    that''s it ';' .
     83    >>> show( "a''''b" )
     84    a''''b
     85
     86    Now test percent doubling
     87   
     88    >>> show( "hello there %1 %2 %%; %" )
     89    hello there %%1 %%2 %%;
     90     %%
     91
     92    Test w/o dollar quote:
     93   
     94    >>> show( "$$ d;d $$", dollar_quotes = False )
     95    $$ d;
     96    d $$
     97
     98    Test comments:
     99
     100    >>> show( "this is a line; -- this; doesn't; break\n'now; quote was in comment" )
     101    this is a line;
     102     -- this; doesn't; break
     103    'now; quote was in comment
     104
     105    >>> show( "/* ; a\n -- ; */;z b\n;c" )
     106    /* ; a
     107     -- ; */;
     108    z b
     109    ;
     110    c
     111
     112    """
     113    outer_quote = None
     114    is_comment = False
     115    is_c_comment = False
     116    if dollar_quotes:
     117        re = _id_re
     118    else:
     119        re = _single_id_re
     120
     121    lastPos = 0
     122    for match in re.finditer( script ):
     123        if match.group( 'semi' ):
     124            if not ( outer_quote or is_comment or is_c_comment ):
     125                newPos = match.end( 1 )
     126               
     127                statement = script[ lastPos : newPos ]
     128                yield _percent_re.sub( '%%', statement )
     129                lastPos = newPos
     130        elif match.group( 'comment' ) and not is_c_comment:
     131            if not outer_quote:
     132                is_comment = True
     133        elif match.group( 'ccomment' ) and not is_comment:
     134            if not outer_quote:
     135                is_c_comment = True
     136        elif match.group( 'lineend' ):
     137            is_comment = False
     138        elif match.group( 'endccomment' ):
     139            is_c_comment = False
     140        elif not ( is_comment or is_c_comment ):
     141            quote = filter( lambda g: g is not None, match.groups() )[ 0 ]
     142            if quote == "''":
     143                # double-quote is escape for quote
     144                continue
     145            elif outer_quote and quote == outer_quote:
     146                # strings embedded in other strings needn't be
     147                # well-formed -- throw away nesting if outer quote
     148                # is found
     149                outer_quote = None
     150            elif not outer_quote:
     151                outer_quote = quote
     152               
     153
     154    if lastPos < len( script ):
     155        yield _percent_re.sub( '%%', script[ lastPos : ] )
     156
     157
     158   
     159   
     160   
     161def _test():
     162    import doctest
     163    doctest.testmod()
     164
     165if __name__ == "__main__":
     166    _test()
Back to Top