227: def scan_tokens tokens, options
228: if string.respond_to?(:encoding)
229: unless string.encoding == Encoding::ASCII_8BIT
230: self.string = string.encode Encoding::ASCII_8BIT,
231: :invalid => :replace, :undef => :replace, :replace => '?'
232: end
233: end
234:
235: if check(RE::PHP_START) ||
236: (match?(/\s*<\S/) && exist?(RE::PHP_START)) ||
237: exist?(RE::HTML_INDICATOR) ||
238: check(/.{1,100}#{RE::PHP_START}/om)
239:
240: states = [:initial]
241: else
242:
243: states = [:initial, :php]
244: end
245:
246: label_expected = true
247: case_expected = false
248:
249: heredoc_delimiter = nil
250: delimiter = nil
251: modifier = nil
252:
253: until eos?
254:
255: match = nil
256: kind = nil
257:
258: case states.last
259:
260: when :initial
261: if scan RE::PHP_START
262: kind = :inline_delimiter
263: label_expected = true
264: states << :php
265: else
266: match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
267: @html_scanner.tokenize match unless match.empty?
268: next
269: end
270:
271: when :php
272: if match = scan(/\s+/)
273: tokens << [match, :space]
274: next
275:
276: elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
277: kind = :comment
278:
279: elsif match = scan(RE::IDENTIFIER)
280: kind = Words::IDENT_KIND[match]
281: if kind == :ident && label_expected && check(/:(?!:)/)
282: kind = :label
283: label_expected = true
284: else
285: label_expected = false
286: if kind == :ident && match =~ /^[A-Z]/
287: kind = :constant
288: elsif kind == :reserved
289: case match
290: when 'class'
291: states << :class_expected
292: when 'function'
293: states << :function_expected
294: when 'case', 'default'
295: case_expected = true
296: end
297: elsif match == 'b' && check(/['"]/)
298: modifier = match
299: next
300: end
301: end
302:
303: elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
304: label_expected = false
305: kind = :float
306:
307: elsif scan(/0x[0-9a-fA-F]+/)
308: label_expected = false
309: kind = :hex
310:
311: elsif scan(/\d+/)
312: label_expected = false
313: kind = :integer
314:
315: elsif scan(/'/)
316: tokens << [:open, :string]
317: if modifier
318: tokens << [modifier, :modifier]
319: modifier = nil
320: end
321: kind = :delimiter
322: states.push :sqstring
323:
324: elsif match = scan(/["`]/)
325: tokens << [:open, :string]
326: if modifier
327: tokens << [modifier, :modifier]
328: modifier = nil
329: end
330: delimiter = match
331: kind = :delimiter
332: states.push :dqstring
333:
334: elsif match = scan(RE::VARIABLE)
335: label_expected = false
336: kind = Words::VARIABLE_KIND[match]
337:
338: elsif scan(/\{/)
339: kind = :operator
340: label_expected = true
341: states.push :php
342:
343: elsif scan(/\}/)
344: if states.size == 1
345: kind = :error
346: else
347: states.pop
348: if states.last.is_a?(::Array)
349: delimiter = states.last[1]
350: states[-1] = states.last[0]
351: tokens << [matched, :delimiter]
352: tokens << [:close, :inline]
353: next
354: else
355: kind = :operator
356: label_expected = true
357: end
358: end
359:
360: elsif scan(/@/)
361: label_expected = false
362: kind = :exception
363:
364: elsif scan RE::PHP_END
365: kind = :inline_delimiter
366: states = [:initial]
367:
368: elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
369: tokens << [:open, :string]
370: warn 'heredoc in heredoc?' if heredoc_delimiter
371: heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
372: kind = :delimiter
373: states.push self[3] ? :sqstring : :dqstring
374: heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
375:
376: elsif match = scan(/#{RE::OPERATOR}/o)
377: label_expected = match == ';'
378: if case_expected
379: label_expected = true if match == ':'
380: case_expected = false
381: end
382: kind = :operator
383:
384: else
385: getch
386: kind = :error
387:
388: end
389:
390: when :sqstring
391: if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
392: kind = :content
393: elsif !heredoc_delimiter && scan(/'/)
394: tokens << [matched, :delimiter]
395: tokens << [:close, :string]
396: delimiter = nil
397: label_expected = false
398: states.pop
399: next
400: elsif heredoc_delimiter && match = scan(/\n/)
401: kind = :content
402: if scan heredoc_delimiter
403: tokens << ["\n", :content]
404: tokens << [matched, :delimiter]
405: tokens << [:close, :string]
406: heredoc_delimiter = nil
407: label_expected = false
408: states.pop
409: next
410: end
411: elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
412: kind = :char
413: elsif scan(/\\./m)
414: kind = :content
415: elsif scan(/\\/)
416: kind = :error
417: end
418:
419: when :dqstring
420: if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
421: kind = :content
422: elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
423: tokens << [matched, :delimiter]
424: tokens << [:close, :string]
425: delimiter = nil
426: label_expected = false
427: states.pop
428: next
429: elsif heredoc_delimiter && match = scan(/\n/)
430: kind = :content
431: if scan heredoc_delimiter
432: tokens << ["\n", :content]
433: tokens << [matched, :delimiter]
434: tokens << [:close, :string]
435: heredoc_delimiter = nil
436: label_expected = false
437: states.pop
438: next
439: end
440: elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
441: kind = :char
442: elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
443: kind = :char
444: elsif scan(/\\./m)
445: kind = :content
446: elsif scan(/\\/)
447: kind = :error
448: elsif match = scan(/#{RE::VARIABLE}/o)
449: kind = :local_variable
450: if check(/\[#{RE::IDENTIFIER}\]/o)
451: tokens << [:open, :inline]
452: tokens << [match, :local_variable]
453: tokens << [scan(/\[/), :operator]
454: tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
455: tokens << [scan(/\]/), :operator]
456: tokens << [:close, :inline]
457: next
458: elsif check(/\[/)
459: match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
460: kind = :error
461: elsif check(/->#{RE::IDENTIFIER}/o)
462: tokens << [:open, :inline]
463: tokens << [match, :local_variable]
464: tokens << [scan(/->/), :operator]
465: tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
466: tokens << [:close, :inline]
467: next
468: elsif check(/->/)
469: match << scan(/->/)
470: kind = :error
471: end
472: elsif match = scan(/\{/)
473: if check(/\$/)
474: kind = :delimiter
475: states[-1] = [states.last, delimiter]
476: delimiter = nil
477: states.push :php
478: tokens << [:open, :inline]
479: else
480: kind = :string
481: end
482: elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
483: kind = :local_variable
484: elsif scan(/\$/)
485: kind = :content
486: end
487:
488: when :class_expected
489: if scan(/\s+/)
490: kind = :space
491: elsif match = scan(/#{RE::IDENTIFIER}/o)
492: kind = :class
493: states.pop
494: else
495: states.pop
496: next
497: end
498:
499: when :function_expected
500: if scan(/\s+/)
501: kind = :space
502: elsif scan(/&/)
503: kind = :operator
504: elsif match = scan(/#{RE::IDENTIFIER}/o)
505: kind = :function
506: states.pop
507: else
508: states.pop
509: next
510: end
511:
512: else
513: raise_inspect 'Unknown state!', tokens, states
514: end
515:
516: match ||= matched
517: if $CODERAY_DEBUG and not kind
518: raise_inspect 'Error token %p in line %d' %
519: [[match, kind], line], tokens, states
520: end
521: raise_inspect 'Empty token', tokens, states unless match
522:
523: tokens << [match, kind]
524:
525: end
526:
527: tokens
528: end