+++ /dev/null
-#ParseMaster (July 25 2005)\r
-# Based on "ParseMaster.js" by Dean Edwards <http://dean.edwards.name/>\r
-# Ported to Perl by Rob Seiler, ELR Software Pty Ltd <http://www.elr.com.au>\r
-# Copyright 2005. License <http://creativecommons.org/licenses/LGPL/2.1/>\r
-\r
-package ParseMaster;\r
-use strict;\r
-use Data::Dumper;\r
-\r
-# Package wide variable declarations\r
-use vars qw/$VERSION\r
- @_X_escaped @_X_patterns\r
- /;\r
-\r
-$VERSION = '017';\r
-\r
-# constants\r
-my $X_EXPRESSION = 0;\r
-my $X_REPLACEMENT = 1;\r
-my $X_LENGTH = 2;\r
-\r
-# re's used to determine nesting levels\r
-my $X_GROUPS = qr/\(/o; # NB: Requires g modifier!\r
-my $X_SUB_REPLACE = qr/\$\d/o;\r
-my $X_INDEXED = qr/^\$\d+$/o;\r
-my $XX_ESCAPE = qr/\\./o; # NB: Requires g modifier!\r
-my $XX_DELETED = qr/\001[^\001]*\001/o; # NB: Requires g modifier!\r
-my $DIGIT = qr/[^\D]/o; # Yep - this is a digit - contains no non-digits\r
-\r
-# Constructor\r
-sub new {\r
- my $class = shift;\r
- my $self = {};\r
- @_X_escaped = (); # Re-initialize global for each instance\r
- @_X_patterns = (); # Re-initialize global for each instance\r
- # Instance variables - access by similarly named set/get functions\r
- $self->{_ignoreCase_} = 0;\r
- $self->{_escapeChar_} = '';\r
- bless ($self, $class);\r
- return $self;\r
-}\r
-\r
-sub ignoreCase {\r
- my ($self, $value) = @_;\r
- if (defined($value)) {\r
- $self->{_ignoreCase_} = $value;\r
- }\r
- return $self->{_ignoreCase_};\r
-}\r
-\r
-sub escapeChar{\r
- my ($self, $value) = @_;\r
- if (defined($value)) {\r
- $self->{_escapeChar_} = $value;\r
- }\r
- return $self->{_escapeChar_};\r
-}\r
-\r
-#######################\r
-# Public Parsemaster functions\r
-\r
-my $X_DELETE = sub(@$) {\r
- my $X_offset = pop;\r
- my @X_match = @_;\r
- return (chr(001) . $X_match[$X_offset] . chr(001));\r
-}; # NB semicolon required for closure!\r
-\r
-# create and add a new pattern to the patterns collection\r
-sub add {\r
- my ($self, $expression, $X_replacement) = @_;\r
- if (!$X_replacement) {$X_replacement = $X_DELETE};\r
-\r
- # count the number of sub-expressions\r
- my $temp = &_X_internalEscape($expression);\r
- my $length = 1; # Always at least one because each pattern is itself a sub-expression\r
- $length += $temp =~ s/$X_GROUPS//g; # One way to count the left capturing parentheses in the regexp string\r
-\r
- # does the pattern deal with sub-expressions?\r
- if ((ref($X_replacement) ne "CODE") && ($X_replacement =~ m/$X_SUB_REPLACE/)) {\r
- if ($X_replacement =~ m/$X_INDEXED/) { # a simple lookup? (eg "$2")\r
- # store the index (used for fast retrieval of matched strings)\r
- $X_replacement = substr($X_replacement,1) - 1;\r
- }\r
- else { # a complicated lookup (eg "Hello $2 $1")\r
- my $i = $length;\r
- while ($i) { # Had difficulty getting Perl to do Dean's splitting and joining of strings containing $'s\r
- my $str = '$a[$o+' . ($i-1) . ']'; # eg $a[$o+1]\r
- $X_replacement =~ s/\$$i/$str/; # eg $2 $3 -> $a[$o+1] $a[$o+2]\r
- $i--;\r
- }\r
- # build a function to do the lookup - returns interpolated string of array lookups\r
- $X_replacement = eval('sub {my $o=pop; my @a=@_; return "' . $X_replacement . '"};');\r
- }\r
- }\r
- else {}\r
- # pass the modified arguments\r
- &_X_add($expression || q/^$/, $X_replacement, $length);\r
-}\r
-\r
-# execute the global replacement\r
-sub exec {\r
-#print Dumper(@_X_patterns);\r
- my ($self, $X_string) = @_;\r
- my $escChar = $self->escapeChar();\r
- my $ignoreCase = $self->ignoreCase();\r
- my ($regexp,$captures) = &_getPatterns(); # Concatenated and parenthesized regexp eg '(regex1)|(regex2)|(regex3)' etc\r
- $X_string = &_X_escape($X_string, $escChar);\r
- if ($ignoreCase) {$X_string =~ s/$regexp/{&_X_replacement(&_matchVars($captures,\$X_string))}/gie} # Pass $X_String as a\r
- else {$X_string =~ s/$regexp/{&_X_replacement(&_matchVars($captures,\$X_string))}/ge} # reference for speed\r
-\r
- $X_string = &_X_unescape($X_string, $escChar);\r
- $X_string =~ s/$XX_DELETED//g;\r
- return $X_string;\r
-}\r
-\r
-sub _X_add {\r
- push (@_X_patterns, [@_]); # Save each argument set as is into an array of arrays\r
-}\r
-\r
-# this is the global replace function (it's quite complicated)\r
-sub _X_replacement {\r
- my (@arguments) = @_;\r
-#print Dumper (@arguments);\r
- if ($arguments[0] le '') {return ''}\r
- # Dereference last index (source String) here - faster than in _matchVars (maybe not needed at all?)\r
- $arguments[$#arguments] = ${$arguments[$#arguments]};\r
- my $i = 1;\r
- # loop through the patterns\r
- for (my $j=0; $j<scalar(@_X_patterns); $j++) { # Loop through global all @_X_patterns\r
- my @X_pattern = @{$_X_patterns[$j]};\r
- # do we have a result? NB: "if ($arguments[$i])" as in Dean's Javascript is false for the value 0!!!\r
- if ((defined $arguments[$i]) && ($arguments[$i] gt '')) {\r
- my $X_replacement = $X_pattern[$X_REPLACEMENT];\r
- # switch on type of $replacement\r
- if (ref($X_replacement) eq "CODE") { # function\r
- return &$X_replacement(@arguments,$i);\r
- }\r
- elsif ($X_replacement =~ m/$DIGIT/) { # number (contains no non-digits)\r
- return $arguments[$X_replacement + $i];\r
- }\r
- else { # default\r
- return $X_replacement; # default\r
- }\r
- } # skip over references to sub-expressions\r
- else {$i += $X_pattern[$X_LENGTH]}\r
- }\r
-}\r
-\r
-#######################\r
-# Private functions\r
-#######################\r
-\r
-# encode escaped characters\r
-sub _X_escape {\r
- my ($X_string, $X_escapeChar) = @_;\r
- if ($X_escapeChar) {\r
- my $re = '\\'.$X_escapeChar.'(.)';\r
- $X_string =~ s/$re/{push(@_X_escaped,$1); $X_escapeChar}/ge;\r
- }\r
- return $X_string;\r
-}\r
-\r
-# decode escaped characters\r
-sub _X_unescape {\r
- my ($X_string, $X_escapeChar) = @_;\r
- if ($X_escapeChar) { # We'll only do this if there is an $X_escapeChar!\r
- my $re = '\\'.$X_escapeChar;\r
- $X_string =~ s/$re/{$X_escapeChar . (shift(@_X_escaped))}/ge; # Don't use Dean Edwards as below 'or' here - because zero will return ''!\r
- # $X_string =~ s/$re/{$X_escapeChar . (shift(@_X_escaped) || '')}/ge;\r
- }\r
- return $X_string;\r
-}\r
-\r
-sub _X_internalEscape {\r
- my ($string) = shift;\r
- $string =~ s/$XX_ESCAPE//g;\r
- return $string;\r
-}\r
-\r
-# Builds an array of match variables to (approximately) emulate that available in Javascript String.replace()\r
-sub _matchVars {\r
- my ($m,$sref) = @_;\r
- my @args = (1..$m); # establish the number potential memory variables\r
- my @mv = map {eval("\$$_")} @args; # matchvarv[1..m] = the memory variables $1 .. $m\r
- unshift (@mv, $&); # matchvar[0] = the substring that matched\r
- push (@mv, length($`)); # matchvar[m+1] = offset within the source string where the match occurred (= length of prematch string)\r
- push (@mv, $sref); # matchvar[m+2] = reference to full source string (dereference in caller if/when needed)\r
-#print Dumper (@mv);\r
- return @mv;\r
-}\r
-\r
-sub _getPatterns {\r
- my @Patterns = ();\r
- my $lcp = 0;\r
- for (my $i=0; $i<scalar(@_X_patterns); $i++) { # Loop through global all @_patterns\r
- push (@Patterns, $_X_patterns[$i][$X_EXPRESSION]); # accumulate the expressions\r
- $lcp += $_X_patterns[$i][$X_LENGTH]; # sum the left capturing parenthesis counts\r
- }\r
- my $str = "(" . join(')|(',@Patterns). ")"; # enclose each pattern in () separated by "|"\r
- return ($str, $lcp);\r
-}\r
-\r
-##################\r
-# END #\r
-##################\r
-1; # ParseMaster #\r
-##################\r