Class CodeRay::Scanners::PHP
In: lib/coderay/scanners/php.rb
Parent: Scanner

Original by Stefan Walk.

Methods

Classes and Modules

Module CodeRay::Scanners::PHP::RE
Module CodeRay::Scanners::PHP::Words

Constants

KINDS_NOT_LOC = HTML::KINDS_NOT_LOC

Public Instance methods

[Source]

    # File lib/coderay/scanners/php.rb, line 18
18:     def reset_instance
19:       super
20:       @html_scanner.reset
21:     end

[Source]

     # File lib/coderay/scanners/php.rb, line 227
227:     def scan_tokens tokens, options
228:       if string.respond_to?(:encoding)
229:         unless string.encoding == Encoding::ASCII_8BIT
230:           self.string = string.encode Encoding::ASCII_8BIT,
231:             :invalid => :replace, :undef => :replace, :replace => '?'
232:         end
233:       end
234:       
235:       if check(RE::PHP_START) ||  # starts with <?
236:        (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
237:        exist?(RE::HTML_INDICATOR) ||
238:        check(/.{1,100}#{RE::PHP_START}/om)  # PHP start after max 100 chars
239:         # is HTML with embedded PHP, so start with HTML
240:         states = [:initial]
241:       else
242:         # is just PHP, so start with PHP surrounded by HTML
243:         states = [:initial, :php]
244:       end
245:       
246:       label_expected = true
247:       case_expected = false
248:       
249:       heredoc_delimiter = nil
250:       delimiter = nil
251:       modifier = nil
252:       
253:       until eos?
254:         
255:         match = nil
256:         kind = nil
257:         
258:         case states.last
259:         
260:         when :initial  # HTML
261:           if scan RE::PHP_START
262:             kind = :inline_delimiter
263:             label_expected = true
264:             states << :php
265:           else
266:             match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
267:             @html_scanner.tokenize match unless match.empty?
268:             next
269:           end
270:         
271:         when :php
272:           if match = scan(/\s+/)
273:             tokens << [match, :space]
274:             next
275:           
276:           elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
277:             kind = :comment
278:           
279:           elsif match = scan(RE::IDENTIFIER)
280:             kind = Words::IDENT_KIND[match]
281:             if kind == :ident && label_expected && check(/:(?!:)/)
282:               kind = :label
283:               label_expected = true
284:             else
285:               label_expected = false
286:               if kind == :ident && match =~ /^[A-Z]/
287:                 kind = :constant
288:               elsif kind == :reserved
289:                 case match
290:                 when 'class'
291:                   states << :class_expected
292:                 when 'function'
293:                   states << :function_expected
294:                 when 'case', 'default'
295:                   case_expected = true
296:                 end
297:               elsif match == 'b' && check(/['"]/)  # binary string literal
298:                 modifier = match
299:                 next
300:               end
301:             end
302:           
303:           elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
304:             label_expected = false
305:             kind = :float
306:           
307:           elsif scan(/0x[0-9a-fA-F]+/)
308:             label_expected = false
309:             kind = :hex
310:           
311:           elsif scan(/\d+/)
312:             label_expected = false
313:             kind = :integer
314:           
315:           elsif scan(/'/)
316:             tokens << [:open, :string]
317:             if modifier
318:               tokens << [modifier, :modifier]
319:               modifier = nil
320:             end
321:             kind = :delimiter
322:             states.push :sqstring
323:           
324:           elsif match = scan(/["`]/)
325:             tokens << [:open, :string]
326:             if modifier
327:               tokens << [modifier, :modifier]
328:               modifier = nil
329:             end
330:             delimiter = match
331:             kind = :delimiter
332:             states.push :dqstring
333:           
334:           elsif match = scan(RE::VARIABLE)
335:             label_expected = false
336:             kind = Words::VARIABLE_KIND[match]
337:           
338:           elsif scan(/\{/)
339:             kind = :operator
340:             label_expected = true
341:             states.push :php
342:           
343:           elsif scan(/\}/)
344:             if states.size == 1
345:               kind = :error
346:             else
347:               states.pop
348:               if states.last.is_a?(::Array)
349:                 delimiter = states.last[1]
350:                 states[-1] = states.last[0]
351:                 tokens << [matched, :delimiter]
352:                 tokens << [:close, :inline]
353:                 next
354:               else
355:                 kind = :operator
356:                 label_expected = true
357:               end
358:             end
359:           
360:           elsif scan(/@/)
361:             label_expected = false
362:             kind = :exception
363:           
364:           elsif scan RE::PHP_END
365:             kind = :inline_delimiter
366:             states = [:initial]
367:           
368:           elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
369:             tokens << [:open, :string]
370:             warn 'heredoc in heredoc?' if heredoc_delimiter
371:             heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
372:             kind = :delimiter
373:             states.push self[3] ? :sqstring : :dqstring
374:             heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
375:           
376:           elsif match = scan(/#{RE::OPERATOR}/o)
377:             label_expected = match == ';'
378:             if case_expected
379:               label_expected = true if match == ':'
380:               case_expected = false
381:             end
382:             kind = :operator
383:           
384:           else
385:             getch
386:             kind = :error
387:           
388:           end
389:         
390:         when :sqstring
391:           if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
392:             kind = :content
393:           elsif !heredoc_delimiter && scan(/'/)
394:             tokens << [matched, :delimiter]
395:             tokens << [:close, :string]
396:             delimiter = nil
397:             label_expected = false
398:             states.pop
399:             next
400:           elsif heredoc_delimiter && match = scan(/\n/)
401:             kind = :content
402:             if scan heredoc_delimiter
403:               tokens << ["\n", :content]
404:               tokens << [matched, :delimiter]
405:               tokens << [:close, :string]
406:               heredoc_delimiter = nil
407:               label_expected = false
408:               states.pop
409:               next
410:             end
411:           elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
412:             kind = :char
413:           elsif scan(/\\./m)
414:             kind = :content
415:           elsif scan(/\\/)
416:             kind = :error
417:           end
418:         
419:         when :dqstring
420:           if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
421:             kind = :content
422:           elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
423:             tokens << [matched, :delimiter]
424:             tokens << [:close, :string]
425:             delimiter = nil
426:             label_expected = false
427:             states.pop
428:             next
429:           elsif heredoc_delimiter && match = scan(/\n/)
430:             kind = :content
431:             if scan heredoc_delimiter
432:               tokens << ["\n", :content]
433:               tokens << [matched, :delimiter]
434:               tokens << [:close, :string]
435:               heredoc_delimiter = nil
436:               label_expected = false
437:               states.pop
438:               next
439:             end
440:           elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
441:             kind = :char
442:           elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
443:             kind = :char
444:           elsif scan(/\\./m)
445:             kind = :content
446:           elsif scan(/\\/)
447:             kind = :error
448:           elsif match = scan(/#{RE::VARIABLE}/o)
449:             kind = :local_variable
450:             if check(/\[#{RE::IDENTIFIER}\]/o)
451:               tokens << [:open, :inline]
452:               tokens << [match, :local_variable]
453:               tokens << [scan(/\[/), :operator]
454:               tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
455:               tokens << [scan(/\]/), :operator]
456:               tokens << [:close, :inline]
457:               next
458:             elsif check(/\[/)
459:               match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
460:               kind = :error
461:             elsif check(/->#{RE::IDENTIFIER}/o)
462:               tokens << [:open, :inline]
463:               tokens << [match, :local_variable]
464:               tokens << [scan(/->/), :operator]
465:               tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
466:               tokens << [:close, :inline]
467:               next
468:             elsif check(/->/)
469:               match << scan(/->/)
470:               kind = :error
471:             end
472:           elsif match = scan(/\{/)
473:             if check(/\$/)
474:               kind = :delimiter
475:               states[-1] = [states.last, delimiter]
476:               delimiter = nil
477:               states.push :php
478:               tokens << [:open, :inline]
479:             else
480:               kind = :string
481:             end
482:           elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
483:             kind = :local_variable
484:           elsif scan(/\$/)
485:             kind = :content
486:           end
487:         
488:         when :class_expected
489:           if scan(/\s+/)
490:             kind = :space
491:           elsif match = scan(/#{RE::IDENTIFIER}/o)
492:             kind = :class
493:             states.pop
494:           else
495:             states.pop
496:             next
497:           end
498:         
499:         when :function_expected
500:           if scan(/\s+/)
501:             kind = :space
502:           elsif scan(/&/)
503:             kind = :operator
504:           elsif match = scan(/#{RE::IDENTIFIER}/o)
505:             kind = :function
506:             states.pop
507:           else
508:             states.pop
509:             next
510:           end
511:         
512:         else
513:           raise_inspect 'Unknown state!', tokens, states
514:         end
515:         
516:         match ||= matched
517:         if $CODERAY_DEBUG and not kind
518:           raise_inspect 'Error token %p in line %d' %
519:             [[match, kind], line], tokens, states
520:         end
521:         raise_inspect 'Empty token', tokens, states unless match
522:         
523:         tokens << [match, kind]
524:         
525:       end
526:       
527:       tokens
528:     end

[Source]

    # File lib/coderay/scanners/php.rb, line 14
14:     def setup
15:       @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
16:     end

[Validate]