Highlighting PHP with semantic HTML

Forget about AddType application/x-httpd-php-source .phps or highlight_string(). This is way better. Not only because it validates under a strict doctype, but it's semantic fluff and completely stylable with CSS.

Inspired by Tom-Eric.

Note: some tokens from http://php.net/tokens are missing, cause I haven't updated this in a while. But you'll get the idea :)

<?php

class Highlighter {

 var $defined_functions; // private $defined_functions;
 var $defined_constants; // private $defined_constants;

 var $highlightBrackets; // public $highlightBrackets;
 var $highlightWhiteSpace; // public $highlightWhiteSpace;

 function Highlighter() {
  $this->defined_functions = get_defined_functions();
  $this->defined_constants = get_defined_constants();

  $this->highlightBrackets = true;
  $this->highlightWhiteSpace = false;
 }

 function HighlightPHP($source) {
  $return = '';
  $tokens = token_get_all($source);
  foreach ($tokens as $code) {
   if (is_array($code)) {
    $name = $code[0];
    $value = htmlentities($code[1]);
    switch ($name) {

     case T_OPEN_TAG:
     case T_OPEN_TAG_WITH_ECHO:
      $return .= '<pre class="php"><code class="php-tag">' . str_replace('<', '&lt;', str_replace('>', '&gt;', $value)) . '</code>';
      break;

     case T_CLOSE_TAG:
      $return .= '<code class="php-tag">' . str_replace('<', '&lt;', str_replace('>', '&gt;', $value)) . '</code></pre>';
      break;

     case T_STRING:
      if (in_array($value, $this->defined_functions['internal'])) {
       $return .= '<code class="function"><a href="http://www.php.net/' . $value . '" title="PHP: ' . $value . ' - Manual">' . $value . '</a></code>';
      } elseif (array_key_exists($value, $this->defined_constants)) {
       $return .= '<code class="constant" title="Constant value: ' . $this->defined_constants[$value] . '">' . $value . '</code>';
      } else {
       $return .= '<code class="function">' . str_replace('<', '&lt;', str_replace('>', '&gt;', $value)) . '</code>';
      }
      break;

     case T_NUM_STRING:
      $return .= '<code class="numeric string">' . str_replace('<', '&lt;', str_replace('>', '&gt;', $value)) . '</code>';
      break;

     case T_LINE:
     case T_FILE:
     case T_FUNC_C:
     case T_CLASS_C:
      $return .= '<code class="constant">' . $value . '</code>';
      break;

     case T_INLINE_HTML:
      $return .= '<code class="html">' . str_replace('<', '&lt;', str_replace('>', '&gt;', $value)) . '</code>';
      break;

     case T_LNUMBER:
     case T_DNUMBER:
      $return .= '<code class="integer">' . $value . '</code>';
      break;

     case T_CONSTANT_ENCAPSED_STRING:
      $return .= '<code class="encapsed">' . str_replace('<', '&lt;', str_replace('>', '&gt;', $value)) . '</code>';
      break;

     case T_OBJECT_OPERATOR:
     case T_PAAMAYIM_NEKUDOTAYIM:
      $return .= '<code class="object operator">' . $value . '</code>';
      break;

     case T_ECHO:
     case T_PRINT:
     case T_IF:
     case T_ELSEIF:
     case T_INCLUDE:
     case T_INCLUDE_ONCE:
     case T_REQUIRE:
     case T_REQUIRE_ONCE:
     case T_ELSE:
     case T_FOR:
     case T_SWITCH:
     case T_WHILE:
     case T_RETURN:
     case T_ISSET:
     case T_UNSET:
     case T_EMPTY:
     case T_ARRAY:
     case T_DO:
     case T_DECLARE:
     case T_CONTINUE:
     case T_NEW:
     case T_CASE:
     case T_FOREACH:
     case T_CONST:
     case T_EXIT:
     case T_DEFAULT:
     case T_AS:
     case T_BREAK:
     case T_FUNCTION:
     case T_CLASS:
     case T_EXTENDS:
     case T_VAR:
     // Not defined in PHP 5:
     case T_OLD_FUNCTION:
      $return .= '<code class="construct">' . $value . '</code>';
      break;

     case T_AND_EQUAL:
     case T_CONCAT_EQUAL:
     case T_DIV_EQUAL:
     case T_MINUS_EQUAL:
     case T_MOD_EQUAL:
     case T_MUL_EQUAL:
     case T_OR_EQUAL:
     case T_PLUS_EQUAL:
     case T_DOUBLE_ARROW:
     case T_SL_EQUAL:
     case T_INC:
     case T_DEC:
     case T_SR_EQUAL:
     case T_XOR_EQUAL:
      $return .= '<code class="assignment operator">' . $value . '</code>';
      break;

     case T_SL:
     case T_SR:
      $return .= '<code class="bitwise operator">' . $value . '</code>';
      break;

     case T_BOOLEAN_AND:
     case T_BOOLEAN_OR:
     case T_LOGICAL_AND:
     case T_LOGICAL_OR:
     case T_LOGICAL_XOR:
      $return .= '<code class="logical operator">' . $value . '</code>';
      break;

     case T_IS_EQUAL:
     case T_IS_GREATER_OR_EQUAL:
     case T_IS_IDENTICAL:
     case T_IS_NOT_EQUAL:
     case T_IS_NOT_IDENTICAL:
     case T_IS_SMALLER_OR_EQUAL:
      $return .= '<code class="comparison operator">' . $value . '</code>';
      break;

     case T_COMMENT:
     case T_DOC_COMMENT:
     // Not defined in PHP 5:
     case T_ML_COMMENT:
      $return .= '<code class="comment">' . str_replace('<', '&lt;', str_replace('>', '&gt;', $value)) . '</code>';
      break;

     case T_VARIABLE:
      $return .= '<var>' . $value . '</var>';
      break;

     case T_WHITESPACE:
      /* Giving whitespace a wrapper is handy if you want to turn
      * this off with css: code.whitespace { white-space: normal; }
      */
      if ($this->highlightWhiteSpace) {
       $return .= '<code class="whitespace">' . $value . '</code>';
      } else {
       $return .= $value;
      }
      break;

     default:
      $return .= htmlspecialchars($value);

    }
   } else {
    if ($this->highlightBrackets) {
     if ($code == '{' || $code == '}') {
      $return .= '<code class="curly bracket">' . $code . '</code>';
     } elseif ($code == '[' || $code == ']') {
      $return .= '<code class="square bracket">' . $code . '</code>';
     } elseif ($code == '(' || $code == ')') {
      $return .= '<code class="parenthese bracket">' . $code . '</code>';
     } elseif ($code == ':') {
      $return .= '<code class="colon">' . $code . '</code>';
     } elseif ($code == ';') {
      $return .= '<code class="semicolon">' . $code . '</code>';
     } else {
      $return .= $this->otherOperators($code);
     }
    } else {
     $return .= $this->otherOperators($code);
    }
   }
  }
  return $return;
 }

 function otherOperators($code) {
  switch ($code) {
   case '>':
   case '<':
    $return = '<code class="comparison operator">' . htmlspecialchars($code) . '</code>';
    break;
   case '+':
   case '-':
   case '*':
   case '/':
   case '%':
    $return = '<code class="arithmetic operator">' . htmlspecialchars($code) . '</code>';
    break;
   case '=':
    $return = '<code class="assignment operator">' . $code . '</code>';
    break;
   case '@':
    $return = '<code class="error-control operator">' . $code . '</code>';
    break;
   default:
    $return = htmlspecialchars($code);
  }
  return $return;
 }

}

?>

Usage

Should be kinda obvious, but here goes for some of the Belgians out there (and of course for posterity) :-).

<?php
  $parser = new Highlighter();
  $c = file_get_contents('file.php');
  echo $parser->HighlightPHP($c);
?>

Or

<?php
  $parser = new Highlighter();
  $parser->highlightBrackets = false;
  $c = file_get_contents('file.php');
  echo $parser->HighlightPHP($c);
?>