将结构转换为 PHP 数组

Convert structure to PHP array

我们公司使用的可怕系统给我以下输出:

{
    party:"bases",
    number:"1",
    id:"xx_3039366",
    url:"systen01-ny.com",
    target:"_self",
    address:"Ch\u00e3o as Alminhas-Medas,Uteiros de Gatos e Fontes Longaq<br/>64320-761 ANHADOS LdA",
    coordinate:{
        x:90.995262145996094,
        y:-1.3394836426
    },
    contactDetails:{
        id:"366",
        phone:"xxxxxx",
        mobile:"",
        fax:"xxxx 777 235",
        c2c:!0
    },
    parameters:"Flex Am\u00e1vel Silva,hal,,EN_30336,S,786657,1,0,",
    text:"Vila Nova de Loz C\u00f4a,os melhores vinhos, v\u00e1rias. Produtor/exportador/com\u00e9rcio",
    website:null,
    mail:"",
    listing:"paid",
    pCode:"64",
    name:"xpto Am\u00e1vel Costa",
    logo:{src:"http://ny.test.gif",
    altname:"xpto Am\u00e1vel Costa"},
    bookingUrl:"",
    ipUrl:"",
    ipLabel:"",
    customerId:"7657",
    addressId:"98760",
    combined:null,
    showReviews:!0
}

我想知道是否有办法将输出转换为数组,就好像它是一个 json,或者我可以在 PHP 中操作这些数据的其他格式. Json_decode 无效。

就像我说的那样,这里是您自己的 Json 对象解析器。

提醒一句,这类事情可能更像是艺术而不是科学,因此如果您的输入与示例中的不同,则可能会出现问题。鉴于样本量较小(1 个文档),我不保证它在该示例之外的功能。

我会尝试解释它是如何工作的,但我担心它会被凡人所迷失。

说真的,这很有趣,享受一次挑战。

<?php
function parseJson($subject, $tokens)
{
    $types = array_keys($tokens);
    $patterns = [];
    $lexer_stream = [];

    $result = false;

    foreach ($tokens as $k=>$v){
        $patterns[] = "(?P<$k>$v)";      
    } 
    $pattern = "/".implode('|', $patterns)."/i";

    if (preg_match_all($pattern, $subject, $matches, PREG_OFFSET_CAPTURE)) {
        //print_r($matches);
        foreach ($matches[0] as $key => $value) {
            $match = [];
            foreach ($types as $type) {
                $match = $matches[$type][$key];
                if (is_array($match) && $match[1] != -1) {
                    break;
                }
            }

            $tok  = [
                'content' => $match[0],
                'type' => $type,
                'offset' => $match[1]
            ];

            $lexer_stream[] = $tok;       
        }

       $result = parseJsonTokens( $lexer_stream );
    }
    return $result;
} 

function parseJsonTokens( array &$lexer_stream ){

    $result = [];

    next($lexer_stream); //advnace one
    $mode = 'key'; //items start in key mode  ( key => value )

    $key = '';
    $value = '';

    while($current = current($lexer_stream)){
        $content = $current['content'];
        $type = $current['type'];

        switch($type){
            case 'T_WHITESPACE'://ignore whitespace
                next($lexer_stream);
            break;
            case 'T_STRING':
                //keys are always strings, but strings are not always keys
                if( $mode == 'key')
                    $key .= $content;
                else
                    $value .= $content;           
                next($lexer_stream); //consume a token
            break;
            case 'T_COLON':
                $mode = 'value'; //change mode key :
                next($lexer_stream);//consume a token
            break;
            case 'T_ENCAP_STRING':
                $value .= trim(unicode_decode($content),'"'); //encapsulated strings are always content
                next($lexer_stream);//consume a token
            break;   
            case 'T_NULL':
                 $value = null; //encapsulated strings are always content
                 next($lexer_stream);//consume a token
            break;          
            case 'T_COMMA':  //comma ends an item               
                //store
                $result[$key] = $value;
                //reset
                $mode = 'key'; //items start in key mode  ( key => value ) 
                $key = '';
                $value = ''; 
                next($lexer_stream);//consume a token
            break;
            case 'T_OPEN_BRACE': //start of a sub-block
                $value = parseJsonTokens($lexer_stream); //recursive
            break;
            case 'T_CLOSE_BRACE': //start of a sub-block
                //store
                $result[$key] = $value;
                next($lexer_stream);//consume a token
                return $result;
            break;
            default:
                print_r($current);
                trigger_error("Unknown token $type value $content", E_USER_ERROR);
        }

    }

    if( !$current ) return;   
    print_r($current);
    trigger_error("Unclosed item $mode for $type value $content", E_USER_ERROR);
}

//@see 
function replace_unicode_escape_sequence($match) {
    return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
}

function unicode_decode($str) {
    return preg_replace_callback('/\\u([0-9a-f]{4})/i', 'replace_unicode_escape_sequence', $str);
}

$str = '{
    party:"bases",
    number:"1",
    id:"xx_3039366",
    url:"systen01-ny.com",
    target:"_self",
    address:"Ch\u00e3o as Alminhas-Medas,Uteiros de Gatos e Fontes Longaq<br/>64320-761 ANHADOS LdA",
    coordinate:{
        x:90.995262145996094,
        y:-1.3394836426
    },
    contactDetails:{
        id:"366",
        phone:"xxxxxx",
        mobile:"",
        fax:"xxxx 777 235",
        c2c:!0
    },
    parameters:"Flex Am\u00e1vel Silva,hal,,EN_30336,S,786657,1,0,",
    text:"Vila Nova de Loz C\u00f4a,os melhores vinhos, v\u00e1rias. Produtor/exportador/com\u00e9rcio",
    website:null,
    mail:"",
    listing:"paid",
    pCode:"64",
    name:"xpto Am\u00e1vel Costa",
    logo:{src:"http://ny.test.gif",
    altname:"xpto Am\u00e1vel Costa"},
    bookingUrl:"",
    ipUrl:"",
    ipLabel:"",
    customerId:"7657",
    addressId:"98760",
    combined:null,
    showReviews:!0
}';

$tokens = [
    'T_OPEN_BRACE'      => '\{',
    'T_CLOSE_BRACE'     => '\}',
    'T_NULL'            => '\bnull\b',
    'T_ENCAP_STRING'    => '\".*?(?<!\\)\"',
    'T_COLON'           => ':',
    'T_COMMA'           => ',',
    'T_STRING'          => '[-a-z0-9_.!]+',
    'T_WHITESPACE'      => '[\r\n\s\t]+',
    'T_UNKNOWN'         => '.+?'
];

var_export( parseJson($str, $tokens) );

输出(这是每个人都想要的)

array (
  'party' => 'bases',
  'number' => '1',
  'id' => 'xx_3039366',
  'url' => 'systen01-ny.com',
  'target' => '_self',
  'address' => 'Chão as Alminhas-Medas,Uteiros de Gatos e Fontes Longaq<br/>64320-761 ANHADOS LdA',
  'coordinate' => 
  array (
    'x' => '90.995262145996094',
    'y' => '-1.3394836426',
  ),
  'contactDetails' => 
  array (
    'id' => '366',
    'phone' => 'xxxxxx',
    'mobile' => '',
    'fax' => 'xxxx 777 235',
    'c2c' => '!0',
  ),
  'parameters' => 'Flex Amável Silva,hal,,EN_30336,S,786657,1,0,',
  'text' => 'Vila Nova de Loz Côa,os melhores vinhos, várias. Produtor/exportador/comércio',
  'website' => NULL,
  'mail' => '',
  'listing' => 'paid',
  'pCode' => '64',
  'name' => 'xpto Amável Costa',
  'logo' => 
  array (
    'src' => 'http://ny.test.gif',
    'altname' => 'xpto Amável Costa',
  ),
  'bookingUrl' => '',
  'ipUrl' => '',
  'ipLabel' => '',
  'customerId' => '7657',
  'addressId' => '98760',
  'combined' => NULL,
  'showReviews' => '!0',
)

你甚至可以在这里测试它(因为我是个好人)

http://sandbox.onlinephpfunctions.com/code/3c1dcafb59abbf19f7f3209724dbdd4a46546c57

在这个 SO post 的帮助下,我能够解决编码问题 \u00e 等问题,所以向他们大声疾呼,因为我讨厌字符编码。

伙计,我只是喜欢一段漂亮的代码,嗯。

干杯!