loadjson.m 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
  1. function data = loadjson(fname,varargin)
  2. %
  3. % data=loadjson(fname,opt)
  4. % or
  5. % data=loadjson(fname,'param1',value1,'param2',value2,...)
  6. %
  7. % parse a JSON (JavaScript Object Notation) file or string
  8. %
  9. % authors:Qianqian Fang (fangq<at> nmr.mgh.harvard.edu)
  10. % created on 2011/09/09, including previous works from
  11. %
  12. % Nedialko Krouchev: http://www.mathworks.com/matlabcentral/fileexchange/25713
  13. % created on 2009/11/02
  14. % François Glineur: http://www.mathworks.com/matlabcentral/fileexchange/23393
  15. % created on 2009/03/22
  16. % Joel Feenstra:
  17. % http://www.mathworks.com/matlabcentral/fileexchange/20565
  18. % created on 2008/07/03
  19. %
  20. % $Id: loadjson.m 460 2015-01-03 00:30:45Z fangq $
  21. %
  22. % input:
  23. % fname: input file name, if fname contains "{}" or "[]", fname
  24. % will be interpreted as a JSON string
  25. % opt: a struct to store parsing options, opt can be replaced by
  26. % a list of ('param',value) pairs - the param string is equivallent
  27. % to a field in opt. opt can have the following
  28. % fields (first in [.|.] is the default)
  29. %
  30. % opt.SimplifyCell [0|1]: if set to 1, loadjson will call cell2mat
  31. % for each element of the JSON data, and group
  32. % arrays based on the cell2mat rules.
  33. % opt.FastArrayParser [1|0 or integer]: if set to 1, use a
  34. % speed-optimized array parser when loading an
  35. % array object. The fast array parser may
  36. % collapse block arrays into a single large
  37. % array similar to rules defined in cell2mat; 0 to
  38. % use a legacy parser; if set to a larger-than-1
  39. % value, this option will specify the minimum
  40. % dimension to enable the fast array parser. For
  41. % example, if the input is a 3D array, setting
  42. % FastArrayParser to 1 will return a 3D array;
  43. % setting to 2 will return a cell array of 2D
  44. % arrays; setting to 3 will return to a 2D cell
  45. % array of 1D vectors; setting to 4 will return a
  46. % 3D cell array.
  47. % opt.ShowProgress [0|1]: if set to 1, loadjson displays a progress bar.
  48. %
  49. % output:
  50. % dat: a cell array, where {...} blocks are converted into cell arrays,
  51. % and [...] are converted to arrays
  52. %
  53. % examples:
  54. % dat=loadjson('{"obj":{"string":"value","array":[1,2,3]}}')
  55. % dat=loadjson(['examples' filesep 'example1.json'])
  56. % dat=loadjson(['examples' filesep 'example1.json'],'SimplifyCell',1)
  57. %
  58. % license:
  59. % BSD, see LICENSE_BSD.txt files for details
  60. %
  61. % -- this function is part of JSONLab toolbox (http://iso2mesh.sf.net/cgi-bin/index.cgi?jsonlab)
  62. %
  63. global pos inStr len esc index_esc len_esc isoct arraytoken
  64. if(regexp(fname,'[\{\}\]\[]','once'))
  65. string=fname;
  66. elseif(exist(fname,'file'))
  67. fid = fopen(fname,'rb');
  68. string = fread(fid,inf,'uint8=>char')';
  69. fclose(fid);
  70. else
  71. error('input file does not exist');
  72. end
  73. pos = 1; len = length(string); inStr = string;
  74. isoct=exist('OCTAVE_VERSION','builtin');
  75. arraytoken=find(inStr=='[' | inStr==']' | inStr=='"');
  76. jstr=regexprep(inStr,'\\\\',' ');
  77. escquote=regexp(jstr,'\\"');
  78. arraytoken=sort([arraytoken escquote]);
  79. % String delimiters and escape chars identified to improve speed:
  80. esc = find(inStr=='"' | inStr=='\' ); % comparable to: regexp(inStr, '["\\]');
  81. index_esc = 1; len_esc = length(esc);
  82. opt=varargin2struct(varargin{:});
  83. if(jsonopt('ShowProgress',0,opt)==1)
  84. opt.progressbar_=waitbar(0,'loading ...');
  85. end
  86. jsoncount=1;
  87. while pos <= len
  88. switch(next_char)
  89. case '{'
  90. data{jsoncount} = parse_object(opt);
  91. case '['
  92. data{jsoncount} = parse_array(opt);
  93. otherwise
  94. error_pos('Outer level structure must be an object or an array');
  95. end
  96. jsoncount=jsoncount+1;
  97. end % while
  98. jsoncount=length(data);
  99. if(jsoncount==1 && iscell(data))
  100. data=data{1};
  101. end
  102. if(~isempty(data))
  103. if(isstruct(data)) % data can be a struct array
  104. data=jstruct2array(data);
  105. elseif(iscell(data))
  106. data=jcell2array(data);
  107. end
  108. end
  109. if(isfield(opt,'progressbar_'))
  110. close(opt.progressbar_);
  111. end
  112. %%
  113. function newdata=jcell2array(data)
  114. len=length(data);
  115. newdata=data;
  116. for i=1:len
  117. if(isstruct(data{i}))
  118. newdata{i}=jstruct2array(data{i});
  119. elseif(iscell(data{i}))
  120. newdata{i}=jcell2array(data{i});
  121. end
  122. end
  123. %%-------------------------------------------------------------------------
  124. function newdata=jstruct2array(data)
  125. fn=fieldnames(data);
  126. newdata=data;
  127. len=length(data);
  128. for i=1:length(fn) % depth-first
  129. for j=1:len
  130. if(isstruct(getfield(data(j),fn{i})))
  131. newdata(j)=setfield(newdata(j),fn{i},jstruct2array(getfield(data(j),fn{i})));
  132. end
  133. end
  134. end
  135. if(~isempty(strmatch('x0x5F_ArrayType_',fn)) && ~isempty(strmatch('x0x5F_ArrayData_',fn)))
  136. newdata=cell(len,1);
  137. for j=1:len
  138. ndata=cast(data(j).x0x5F_ArrayData_,data(j).x0x5F_ArrayType_);
  139. iscpx=0;
  140. if(~isempty(strmatch('x0x5F_ArrayIsComplex_',fn)))
  141. if(data(j).x0x5F_ArrayIsComplex_)
  142. iscpx=1;
  143. end
  144. end
  145. if(~isempty(strmatch('x0x5F_ArrayIsSparse_',fn)))
  146. if(data(j).x0x5F_ArrayIsSparse_)
  147. if(~isempty(strmatch('x0x5F_ArraySize_',fn)))
  148. dim=data(j).x0x5F_ArraySize_;
  149. if(iscpx && size(ndata,2)==4-any(dim==1))
  150. ndata(:,end-1)=complex(ndata(:,end-1),ndata(:,end));
  151. end
  152. if isempty(ndata)
  153. % All-zeros sparse
  154. ndata=sparse(dim(1),prod(dim(2:end)));
  155. elseif dim(1)==1
  156. % Sparse row vector
  157. ndata=sparse(1,ndata(:,1),ndata(:,2),dim(1),prod(dim(2:end)));
  158. elseif dim(2)==1
  159. % Sparse column vector
  160. ndata=sparse(ndata(:,1),1,ndata(:,2),dim(1),prod(dim(2:end)));
  161. else
  162. % Generic sparse array.
  163. ndata=sparse(ndata(:,1),ndata(:,2),ndata(:,3),dim(1),prod(dim(2:end)));
  164. end
  165. else
  166. if(iscpx && size(ndata,2)==4)
  167. ndata(:,3)=complex(ndata(:,3),ndata(:,4));
  168. end
  169. ndata=sparse(ndata(:,1),ndata(:,2),ndata(:,3));
  170. end
  171. end
  172. elseif(~isempty(strmatch('x0x5F_ArraySize_',fn)))
  173. if(iscpx && size(ndata,2)==2)
  174. ndata=complex(ndata(:,1),ndata(:,2));
  175. end
  176. ndata=reshape(ndata(:),data(j).x0x5F_ArraySize_);
  177. end
  178. newdata{j}=ndata;
  179. end
  180. if(len==1)
  181. newdata=newdata{1};
  182. end
  183. end
  184. %%-------------------------------------------------------------------------
  185. function object = parse_object(varargin)
  186. parse_char('{');
  187. object = [];
  188. if next_char ~= '}'
  189. while 1
  190. str = parseStr(varargin{:});
  191. if isempty(str)
  192. error_pos('Name of value at position %d cannot be empty');
  193. end
  194. parse_char(':');
  195. val = parse_value(varargin{:});
  196. eval( sprintf( 'object.%s = val;', valid_field(str) ) );
  197. if next_char == '}'
  198. break;
  199. end
  200. parse_char(',');
  201. end
  202. end
  203. parse_char('}');
  204. %%-------------------------------------------------------------------------
  205. function object = parse_array(varargin) % JSON array is written in row-major order
  206. global pos inStr isoct
  207. parse_char('[');
  208. object = cell(0, 1);
  209. dim2=[];
  210. arraydepth=jsonopt('JSONLAB_ArrayDepth_',1,varargin{:});
  211. pbar=jsonopt('progressbar_',-1,varargin{:});
  212. if next_char ~= ']'
  213. if(jsonopt('FastArrayParser',1,varargin{:})>=1 && arraydepth>=jsonopt('FastArrayParser',1,varargin{:}))
  214. [endpos, e1l, e1r, maxlevel]=matching_bracket(inStr,pos);
  215. arraystr=['[' inStr(pos:endpos)];
  216. arraystr=regexprep(arraystr,'"_NaN_"','NaN');
  217. arraystr=regexprep(arraystr,'"([-+]*)_Inf_"','$1Inf');
  218. arraystr(arraystr==sprintf('\n'))=[];
  219. arraystr(arraystr==sprintf('\r'))=[];
  220. %arraystr=regexprep(arraystr,'\s*,',','); % this is slow,sometimes needed
  221. if(~isempty(e1l) && ~isempty(e1r)) % the array is in 2D or higher D
  222. astr=inStr((e1l+1):(e1r-1));
  223. astr=regexprep(astr,'"_NaN_"','NaN');
  224. astr=regexprep(astr,'"([-+]*)_Inf_"','$1Inf');
  225. astr(astr==sprintf('\n'))=[];
  226. astr(astr==sprintf('\r'))=[];
  227. astr(astr==' ')='';
  228. if(isempty(find(astr=='[', 1))) % array is 2D
  229. dim2=length(sscanf(astr,'%f,',[1 inf]));
  230. end
  231. else % array is 1D
  232. astr=arraystr(2:end-1);
  233. astr(astr==' ')='';
  234. [obj, count, errmsg, nextidx]=sscanf(astr,'%f,',[1,inf]);
  235. if(nextidx>=length(astr)-1)
  236. object=obj;
  237. pos=endpos;
  238. parse_char(']');
  239. return;
  240. end
  241. end
  242. if(~isempty(dim2))
  243. astr=arraystr;
  244. astr(astr=='[')='';
  245. astr(astr==']')='';
  246. astr(astr==' ')='';
  247. [obj, count, errmsg, nextidx]=sscanf(astr,'%f,',inf);
  248. if(nextidx>=length(astr)-1)
  249. object=reshape(obj,dim2,numel(obj)/dim2)';
  250. pos=endpos;
  251. parse_char(']');
  252. if(pbar>0)
  253. waitbar(pos/length(inStr),pbar,'loading ...');
  254. end
  255. return;
  256. end
  257. end
  258. arraystr=regexprep(arraystr,'\]\s*,','];');
  259. else
  260. arraystr='[';
  261. end
  262. try
  263. if(isoct && regexp(arraystr,'"','once'))
  264. error('Octave eval can produce empty cells for JSON-like input');
  265. end
  266. object=eval(arraystr);
  267. pos=endpos;
  268. catch
  269. while 1
  270. newopt=varargin2struct(varargin{:},'JSONLAB_ArrayDepth_',arraydepth+1);
  271. val = parse_value(newopt);
  272. object{end+1} = val;
  273. if next_char == ']'
  274. break;
  275. end
  276. parse_char(',');
  277. end
  278. end
  279. end
  280. if(jsonopt('SimplifyCell',0,varargin{:})==1)
  281. try
  282. oldobj=object;
  283. object=cell2mat(object')';
  284. if(iscell(oldobj) && isstruct(object) && numel(object)>1 && jsonopt('SimplifyCellArray',1,varargin{:})==0)
  285. object=oldobj;
  286. elseif(size(object,1)>1 && ndims(object)==2)
  287. object=object';
  288. end
  289. catch
  290. end
  291. end
  292. parse_char(']');
  293. if(pbar>0)
  294. waitbar(pos/length(inStr),pbar,'loading ...');
  295. end
  296. %%-------------------------------------------------------------------------
  297. function parse_char(c)
  298. global pos inStr len
  299. skip_whitespace;
  300. if pos > len || inStr(pos) ~= c
  301. error_pos(sprintf('Expected %c at position %%d', c));
  302. else
  303. pos = pos + 1;
  304. skip_whitespace;
  305. end
  306. %%-------------------------------------------------------------------------
  307. function c = next_char
  308. global pos inStr len
  309. skip_whitespace;
  310. if pos > len
  311. c = [];
  312. else
  313. c = inStr(pos);
  314. end
  315. %%-------------------------------------------------------------------------
  316. function skip_whitespace
  317. global pos inStr len
  318. while pos <= len && isspace(inStr(pos))
  319. pos = pos + 1;
  320. end
  321. %%-------------------------------------------------------------------------
  322. function str = parseStr(varargin)
  323. global pos inStr len esc index_esc len_esc
  324. % len, ns = length(inStr), keyboard
  325. if inStr(pos) ~= '"'
  326. error_pos('String starting with " expected at position %d');
  327. else
  328. pos = pos + 1;
  329. end
  330. str = '';
  331. while pos <= len
  332. while index_esc <= len_esc && esc(index_esc) < pos
  333. index_esc = index_esc + 1;
  334. end
  335. if index_esc > len_esc
  336. str = [str inStr(pos:len)];
  337. pos = len + 1;
  338. break;
  339. else
  340. str = [str inStr(pos:esc(index_esc)-1)];
  341. pos = esc(index_esc);
  342. end
  343. nstr = length(str); switch inStr(pos)
  344. case '"'
  345. pos = pos + 1;
  346. if(~isempty(str))
  347. if(strcmp(str,'_Inf_'))
  348. str=Inf;
  349. elseif(strcmp(str,'-_Inf_'))
  350. str=-Inf;
  351. elseif(strcmp(str,'_NaN_'))
  352. str=NaN;
  353. end
  354. end
  355. return;
  356. case '\'
  357. if pos+1 > len
  358. error_pos('End of file reached right after escape character');
  359. end
  360. pos = pos + 1;
  361. switch inStr(pos)
  362. case {'"' '\' '/'}
  363. str(nstr+1) = inStr(pos);
  364. pos = pos + 1;
  365. case {'b' 'f' 'n' 'r' 't'}
  366. str(nstr+1) = sprintf(['\' inStr(pos)]);
  367. pos = pos + 1;
  368. case 'u'
  369. if pos+4 > len
  370. error_pos('End of file reached in escaped unicode character');
  371. end
  372. str(nstr+(1:6)) = inStr(pos-1:pos+4);
  373. pos = pos + 5;
  374. end
  375. otherwise % should never happen
  376. str(nstr+1) = inStr(pos), keyboard
  377. pos = pos + 1;
  378. end
  379. end
  380. error_pos('End of file while expecting end of inStr');
  381. %%-------------------------------------------------------------------------
  382. function num = parse_number(varargin)
  383. global pos inStr len isoct
  384. currstr=inStr(pos:end);
  385. numstr=0;
  386. if(isoct~=0)
  387. numstr=regexp(currstr,'^\s*-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+\-]?\d+)?','end');
  388. [num, one] = sscanf(currstr, '%f', 1);
  389. delta=numstr+1;
  390. else
  391. [num, one, err, delta] = sscanf(currstr, '%f', 1);
  392. if ~isempty(err)
  393. error_pos('Error reading number at position %d');
  394. end
  395. end
  396. pos = pos + delta-1;
  397. %%-------------------------------------------------------------------------
  398. function val = parse_value(varargin)
  399. global pos inStr len
  400. true = 1; false = 0;
  401. pbar=jsonopt('progressbar_',-1,varargin{:});
  402. if(pbar>0)
  403. waitbar(pos/len,pbar,'loading ...');
  404. end
  405. switch(inStr(pos))
  406. case '"'
  407. val = parseStr(varargin{:});
  408. return;
  409. case '['
  410. val = parse_array(varargin{:});
  411. return;
  412. case '{'
  413. val = parse_object(varargin{:});
  414. if isstruct(val)
  415. if(~isempty(strmatch('x0x5F_ArrayType_',fieldnames(val), 'exact')))
  416. val=jstruct2array(val);
  417. end
  418. elseif isempty(val)
  419. val = struct;
  420. end
  421. return;
  422. case {'-','0','1','2','3','4','5','6','7','8','9'}
  423. val = parse_number(varargin{:});
  424. return;
  425. case 't'
  426. if pos+3 <= len && strcmpi(inStr(pos:pos+3), 'true')
  427. val = true;
  428. pos = pos + 4;
  429. return;
  430. end
  431. case 'f'
  432. if pos+4 <= len && strcmpi(inStr(pos:pos+4), 'false')
  433. val = false;
  434. pos = pos + 5;
  435. return;
  436. end
  437. case 'n'
  438. if pos+3 <= len && strcmpi(inStr(pos:pos+3), 'null')
  439. val = [];
  440. pos = pos + 4;
  441. return;
  442. end
  443. end
  444. error_pos('Value expected at position %d');
  445. %%-------------------------------------------------------------------------
  446. function error_pos(msg)
  447. global pos inStr len
  448. poShow = max(min([pos-15 pos-1 pos pos+20],len),1);
  449. if poShow(3) == poShow(2)
  450. poShow(3:4) = poShow(2)+[0 -1]; % display nothing after
  451. end
  452. msg = [sprintf(msg, pos) ': ' ...
  453. inStr(poShow(1):poShow(2)) '<error>' inStr(poShow(3):poShow(4)) ];
  454. error( ['JSONparser:invalidFormat: ' msg] );
  455. %%-------------------------------------------------------------------------
  456. function str = valid_field(str)
  457. global isoct
  458. % From MATLAB doc: field names must begin with a letter, which may be
  459. % followed by any combination of letters, digits, and underscores.
  460. % Invalid characters will be converted to underscores, and the prefix
  461. % "x0x[Hex code]_" will be added if the first character is not a letter.
  462. pos=regexp(str,'^[^A-Za-z]','once');
  463. if(~isempty(pos))
  464. if(~isoct)
  465. str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
  466. else
  467. str=sprintf('x0x%X_%s',char(str(1)),str(2:end));
  468. end
  469. end
  470. if(isempty(regexp(str,'[^0-9A-Za-z_]', 'once' ))) return; end
  471. if(~isoct)
  472. str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
  473. else
  474. pos=regexp(str,'[^0-9A-Za-z_]');
  475. if(isempty(pos)) return; end
  476. str0=str;
  477. pos0=[0 pos(:)' length(str)];
  478. str='';
  479. for i=1:length(pos)
  480. str=[str str0(pos0(i)+1:pos(i)-1) sprintf('_0x%X_',str0(pos(i)))];
  481. end
  482. if(pos(end)~=length(str))
  483. str=[str str0(pos0(end-1)+1:pos0(end))];
  484. end
  485. end
  486. %str(~isletter(str) & ~('0' <= str & str <= '9')) = '_';
  487. %%-------------------------------------------------------------------------
  488. function endpos = matching_quote(str,pos)
  489. len=length(str);
  490. while(pos<len)
  491. if(str(pos)=='"')
  492. if(~(pos>1 && str(pos-1)=='\'))
  493. endpos=pos;
  494. return;
  495. end
  496. end
  497. pos=pos+1;
  498. end
  499. error('unmatched quotation mark');
  500. %%-------------------------------------------------------------------------
  501. function [endpos, e1l, e1r, maxlevel] = matching_bracket(str,pos)
  502. global arraytoken
  503. level=1;
  504. maxlevel=level;
  505. endpos=0;
  506. bpos=arraytoken(arraytoken>=pos);
  507. tokens=str(bpos);
  508. len=length(tokens);
  509. pos=1;
  510. e1l=[];
  511. e1r=[];
  512. while(pos<=len)
  513. c=tokens(pos);
  514. if(c==']')
  515. level=level-1;
  516. if(isempty(e1r)) e1r=bpos(pos); end
  517. if(level==0)
  518. endpos=bpos(pos);
  519. return
  520. end
  521. end
  522. if(c=='[')
  523. if(isempty(e1l)) e1l=bpos(pos); end
  524. level=level+1;
  525. maxlevel=max(maxlevel,level);
  526. end
  527. if(c=='"')
  528. pos=matching_quote(tokens,pos+1);
  529. end
  530. pos=pos+1;
  531. end
  532. if(endpos==0)
  533. error('unmatched "]"');
  534. end