loadubjson.m 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. function data = loadubjson(fname,varargin)
  2. %
  3. % data=loadubjson(fname,opt)
  4. % or
  5. % data=loadubjson(fname,'param1',value1,'param2',value2,...)
  6. %
  7. % parse a JSON (JavaScript Object Notation) file or string
  8. %
  9. % authors:Qianqian Fang (fangq<at> nmr.mgh.harvard.edu)
  10. % created on 2013/08/01
  11. %
  12. % $Id: loadubjson.m 460 2015-01-03 00:30:45Z fangq $
  13. %
  14. % input:
  15. % fname: input file name, if fname contains "{}" or "[]", fname
  16. % will be interpreted as a UBJSON string
  17. % opt: a struct to store parsing options, opt can be replaced by
  18. % a list of ('param',value) pairs - the param string is equivallent
  19. % to a field in opt. opt can have the following
  20. % fields (first in [.|.] is the default)
  21. %
  22. % opt.SimplifyCell [0|1]: if set to 1, loadubjson will call cell2mat
  23. % for each element of the JSON data, and group
  24. % arrays based on the cell2mat rules.
  25. % opt.IntEndian [B|L]: specify the endianness of the integer fields
  26. % in the UBJSON input data. B - Big-Endian format for
  27. % integers (as required in the UBJSON specification);
  28. % L - input integer fields are in Little-Endian order.
  29. %
  30. % output:
  31. % dat: a cell array, where {...} blocks are converted into cell arrays,
  32. % and [...] are converted to arrays
  33. %
  34. % examples:
  35. % obj=struct('string','value','array',[1 2 3]);
  36. % ubjdata=saveubjson('obj',obj);
  37. % dat=loadubjson(ubjdata)
  38. % dat=loadubjson(['examples' filesep 'example1.ubj'])
  39. % dat=loadubjson(['examples' filesep 'example1.ubj'],'SimplifyCell',1)
  40. %
  41. % license:
  42. % BSD, see LICENSE_BSD.txt files for details
  43. %
  44. % -- this function is part of JSONLab toolbox (http://iso2mesh.sf.net/cgi-bin/index.cgi?jsonlab)
  45. %
  46. global pos inStr len esc index_esc len_esc isoct arraytoken fileendian systemendian
  47. if(regexp(fname,'[\{\}\]\[]','once'))
  48. string=fname;
  49. elseif(exist(fname,'file'))
  50. fid = fopen(fname,'rb');
  51. string = fread(fid,inf,'uint8=>char')';
  52. fclose(fid);
  53. else
  54. error('input file does not exist');
  55. end
  56. pos = 1; len = length(string); inStr = string;
  57. isoct=exist('OCTAVE_VERSION','builtin');
  58. arraytoken=find(inStr=='[' | inStr==']' | inStr=='"');
  59. jstr=regexprep(inStr,'\\\\',' ');
  60. escquote=regexp(jstr,'\\"');
  61. arraytoken=sort([arraytoken escquote]);
  62. % String delimiters and escape chars identified to improve speed:
  63. esc = find(inStr=='"' | inStr=='\' ); % comparable to: regexp(inStr, '["\\]');
  64. index_esc = 1; len_esc = length(esc);
  65. opt=varargin2struct(varargin{:});
  66. fileendian=upper(jsonopt('IntEndian','B',opt));
  67. [os,maxelem,systemendian]=computer;
  68. jsoncount=1;
  69. while pos <= len
  70. switch(next_char)
  71. case '{'
  72. data{jsoncount} = parse_object(opt);
  73. case '['
  74. data{jsoncount} = parse_array(opt);
  75. otherwise
  76. error_pos('Outer level structure must be an object or an array');
  77. end
  78. jsoncount=jsoncount+1;
  79. end % while
  80. jsoncount=length(data);
  81. if(jsoncount==1 && iscell(data))
  82. data=data{1};
  83. end
  84. if(~isempty(data))
  85. if(isstruct(data)) % data can be a struct array
  86. data=jstruct2array(data);
  87. elseif(iscell(data))
  88. data=jcell2array(data);
  89. end
  90. end
  91. %%
  92. function newdata=parse_collection(id,data,obj)
  93. if(jsoncount>0 && exist('data','var'))
  94. if(~iscell(data))
  95. newdata=cell(1);
  96. newdata{1}=data;
  97. data=newdata;
  98. end
  99. end
  100. %%
  101. function newdata=jcell2array(data)
  102. len=length(data);
  103. newdata=data;
  104. for i=1:len
  105. if(isstruct(data{i}))
  106. newdata{i}=jstruct2array(data{i});
  107. elseif(iscell(data{i}))
  108. newdata{i}=jcell2array(data{i});
  109. end
  110. end
  111. %%-------------------------------------------------------------------------
  112. function newdata=jstruct2array(data)
  113. fn=fieldnames(data);
  114. newdata=data;
  115. len=length(data);
  116. for i=1:length(fn) % depth-first
  117. for j=1:len
  118. if(isstruct(getfield(data(j),fn{i})))
  119. newdata(j)=setfield(newdata(j),fn{i},jstruct2array(getfield(data(j),fn{i})));
  120. end
  121. end
  122. end
  123. if(~isempty(strmatch('x0x5F_ArrayType_',fn)) && ~isempty(strmatch('x0x5F_ArrayData_',fn)))
  124. newdata=cell(len,1);
  125. for j=1:len
  126. ndata=cast(data(j).x0x5F_ArrayData_,data(j).x0x5F_ArrayType_);
  127. iscpx=0;
  128. if(~isempty(strmatch('x0x5F_ArrayIsComplex_',fn)))
  129. if(data(j).x0x5F_ArrayIsComplex_)
  130. iscpx=1;
  131. end
  132. end
  133. if(~isempty(strmatch('x0x5F_ArrayIsSparse_',fn)))
  134. if(data(j).x0x5F_ArrayIsSparse_)
  135. if(~isempty(strmatch('x0x5F_ArraySize_',fn)))
  136. dim=double(data(j).x0x5F_ArraySize_);
  137. if(iscpx && size(ndata,2)==4-any(dim==1))
  138. ndata(:,end-1)=complex(ndata(:,end-1),ndata(:,end));
  139. end
  140. if isempty(ndata)
  141. % All-zeros sparse
  142. ndata=sparse(dim(1),prod(dim(2:end)));
  143. elseif dim(1)==1
  144. % Sparse row vector
  145. ndata=sparse(1,ndata(:,1),ndata(:,2),dim(1),prod(dim(2:end)));
  146. elseif dim(2)==1
  147. % Sparse column vector
  148. ndata=sparse(ndata(:,1),1,ndata(:,2),dim(1),prod(dim(2:end)));
  149. else
  150. % Generic sparse array.
  151. ndata=sparse(ndata(:,1),ndata(:,2),ndata(:,3),dim(1),prod(dim(2:end)));
  152. end
  153. else
  154. if(iscpx && size(ndata,2)==4)
  155. ndata(:,3)=complex(ndata(:,3),ndata(:,4));
  156. end
  157. ndata=sparse(ndata(:,1),ndata(:,2),ndata(:,3));
  158. end
  159. end
  160. elseif(~isempty(strmatch('x0x5F_ArraySize_',fn)))
  161. if(iscpx && size(ndata,2)==2)
  162. ndata=complex(ndata(:,1),ndata(:,2));
  163. end
  164. ndata=reshape(ndata(:),data(j).x0x5F_ArraySize_);
  165. end
  166. newdata{j}=ndata;
  167. end
  168. if(len==1)
  169. newdata=newdata{1};
  170. end
  171. end
  172. %%-------------------------------------------------------------------------
  173. function object = parse_object(varargin)
  174. parse_char('{');
  175. object = [];
  176. type='';
  177. count=-1;
  178. if(next_char == '$')
  179. type=inStr(pos+1); % TODO
  180. pos=pos+2;
  181. end
  182. if(next_char == '#')
  183. pos=pos+1;
  184. count=double(parse_number());
  185. end
  186. if next_char ~= '}'
  187. num=0;
  188. while 1
  189. str = parseStr(varargin{:});
  190. if isempty(str)
  191. error_pos('Name of value at position %d cannot be empty');
  192. end
  193. %parse_char(':');
  194. val = parse_value(varargin{:});
  195. num=num+1;
  196. eval( sprintf( 'object.%s = val;', valid_field(str) ) );
  197. if next_char == '}' || (count>=0 && num>=count)
  198. break;
  199. end
  200. %parse_char(',');
  201. end
  202. end
  203. if(count==-1)
  204. parse_char('}');
  205. end
  206. %%-------------------------------------------------------------------------
  207. function [cid,len]=elem_info(type)
  208. id=strfind('iUIlLdD',type);
  209. dataclass={'int8','uint8','int16','int32','int64','single','double'};
  210. bytelen=[1,1,2,4,8,4,8];
  211. if(id>0)
  212. cid=dataclass{id};
  213. len=bytelen(id);
  214. else
  215. error_pos('unsupported type at position %d');
  216. end
  217. %%-------------------------------------------------------------------------
  218. function [data adv]=parse_block(type,count,varargin)
  219. global pos inStr isoct fileendian systemendian
  220. [cid,len]=elem_info(type);
  221. datastr=inStr(pos:pos+len*count-1);
  222. if(isoct)
  223. newdata=int8(datastr);
  224. else
  225. newdata=uint8(datastr);
  226. end
  227. id=strfind('iUIlLdD',type);
  228. if(id<=5 && fileendian~=systemendian)
  229. newdata=swapbytes(typecast(newdata,cid));
  230. end
  231. data=typecast(newdata,cid);
  232. adv=double(len*count);
  233. %%-------------------------------------------------------------------------
  234. function object = parse_array(varargin) % JSON array is written in row-major order
  235. global pos inStr isoct
  236. parse_char('[');
  237. object = cell(0, 1);
  238. dim=[];
  239. type='';
  240. count=-1;
  241. if(next_char == '$')
  242. type=inStr(pos+1);
  243. pos=pos+2;
  244. end
  245. if(next_char == '#')
  246. pos=pos+1;
  247. if(next_char=='[')
  248. dim=parse_array(varargin{:});
  249. count=prod(double(dim));
  250. else
  251. count=double(parse_number());
  252. end
  253. end
  254. if(~isempty(type))
  255. if(count>=0)
  256. [object adv]=parse_block(type,count,varargin{:});
  257. if(~isempty(dim))
  258. object=reshape(object,dim);
  259. end
  260. pos=pos+adv;
  261. return;
  262. else
  263. endpos=matching_bracket(inStr,pos);
  264. [cid,len]=elem_info(type);
  265. count=(endpos-pos)/len;
  266. [object adv]=parse_block(type,count,varargin{:});
  267. pos=pos+adv;
  268. parse_char(']');
  269. return;
  270. end
  271. end
  272. if next_char ~= ']'
  273. while 1
  274. val = parse_value(varargin{:});
  275. object{end+1} = val;
  276. if next_char == ']'
  277. break;
  278. end
  279. %parse_char(',');
  280. end
  281. end
  282. if(jsonopt('SimplifyCell',0,varargin{:})==1)
  283. try
  284. oldobj=object;
  285. object=cell2mat(object')';
  286. if(iscell(oldobj) && isstruct(object) && numel(object)>1 && jsonopt('SimplifyCellArray',1,varargin{:})==0)
  287. object=oldobj;
  288. elseif(size(object,1)>1 && ndims(object)==2)
  289. object=object';
  290. end
  291. catch
  292. end
  293. end
  294. if(count==-1)
  295. parse_char(']');
  296. end
  297. %%-------------------------------------------------------------------------
  298. function parse_char(c)
  299. global pos inStr len
  300. skip_whitespace;
  301. if pos > len || inStr(pos) ~= c
  302. error_pos(sprintf('Expected %c at position %%d', c));
  303. else
  304. pos = pos + 1;
  305. skip_whitespace;
  306. end
  307. %%-------------------------------------------------------------------------
  308. function c = next_char
  309. global pos inStr len
  310. skip_whitespace;
  311. if pos > len
  312. c = [];
  313. else
  314. c = inStr(pos);
  315. end
  316. %%-------------------------------------------------------------------------
  317. function skip_whitespace
  318. global pos inStr len
  319. while pos <= len && isspace(inStr(pos))
  320. pos = pos + 1;
  321. end
  322. %%-------------------------------------------------------------------------
  323. function str = parseStr(varargin)
  324. global pos inStr esc index_esc len_esc
  325. % len, ns = length(inStr), keyboard
  326. type=inStr(pos);
  327. if type ~= 'S' && type ~= 'C' && type ~= 'H'
  328. error_pos('String starting with S expected at position %d');
  329. else
  330. pos = pos + 1;
  331. end
  332. if(type == 'C')
  333. str=inStr(pos);
  334. pos=pos+1;
  335. return;
  336. end
  337. bytelen=double(parse_number());
  338. if(length(inStr)>=pos+bytelen-1)
  339. str=inStr(pos:pos+bytelen-1);
  340. pos=pos+bytelen;
  341. else
  342. error_pos('End of file while expecting end of inStr');
  343. end
  344. %%-------------------------------------------------------------------------
  345. function num = parse_number(varargin)
  346. global pos inStr len isoct fileendian systemendian
  347. id=strfind('iUIlLdD',inStr(pos));
  348. if(isempty(id))
  349. error_pos('expecting a number at position %d');
  350. end
  351. type={'int8','uint8','int16','int32','int64','single','double'};
  352. bytelen=[1,1,2,4,8,4,8];
  353. datastr=inStr(pos+1:pos+bytelen(id));
  354. if(isoct)
  355. newdata=int8(datastr);
  356. else
  357. newdata=uint8(datastr);
  358. end
  359. if(id<=5 && fileendian~=systemendian)
  360. newdata=swapbytes(typecast(newdata,type{id}));
  361. end
  362. num=typecast(newdata,type{id});
  363. pos = pos + bytelen(id)+1;
  364. %%-------------------------------------------------------------------------
  365. function val = parse_value(varargin)
  366. global pos inStr len
  367. true = 1; false = 0;
  368. switch(inStr(pos))
  369. case {'S','C','H'}
  370. val = parseStr(varargin{:});
  371. return;
  372. case '['
  373. val = parse_array(varargin{:});
  374. return;
  375. case '{'
  376. val = parse_object(varargin{:});
  377. if isstruct(val)
  378. if(~isempty(strmatch('x0x5F_ArrayType_',fieldnames(val), 'exact')))
  379. val=jstruct2array(val);
  380. end
  381. elseif isempty(val)
  382. val = struct;
  383. end
  384. return;
  385. case {'i','U','I','l','L','d','D'}
  386. val = parse_number(varargin{:});
  387. return;
  388. case 'T'
  389. val = true;
  390. pos = pos + 1;
  391. return;
  392. case 'F'
  393. val = false;
  394. pos = pos + 1;
  395. return;
  396. case {'Z','N'}
  397. val = [];
  398. pos = pos + 1;
  399. return;
  400. end
  401. error_pos('Value expected at position %d');
  402. %%-------------------------------------------------------------------------
  403. function error_pos(msg)
  404. global pos inStr len
  405. poShow = max(min([pos-15 pos-1 pos pos+20],len),1);
  406. if poShow(3) == poShow(2)
  407. poShow(3:4) = poShow(2)+[0 -1]; % display nothing after
  408. end
  409. msg = [sprintf(msg, pos) ': ' ...
  410. inStr(poShow(1):poShow(2)) '<error>' inStr(poShow(3):poShow(4)) ];
  411. error( ['JSONparser:invalidFormat: ' msg] );
  412. %%-------------------------------------------------------------------------
  413. function str = valid_field(str)
  414. global isoct
  415. % From MATLAB doc: field names must begin with a letter, which may be
  416. % followed by any combination of letters, digits, and underscores.
  417. % Invalid characters will be converted to underscores, and the prefix
  418. % "x0x[Hex code]_" will be added if the first character is not a letter.
  419. pos=regexp(str,'^[^A-Za-z]','once');
  420. if(~isempty(pos))
  421. if(~isoct)
  422. str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
  423. else
  424. str=sprintf('x0x%X_%s',char(str(1)),str(2:end));
  425. end
  426. end
  427. if(isempty(regexp(str,'[^0-9A-Za-z_]', 'once' ))) return; end
  428. if(~isoct)
  429. str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
  430. else
  431. pos=regexp(str,'[^0-9A-Za-z_]');
  432. if(isempty(pos)) return; end
  433. str0=str;
  434. pos0=[0 pos(:)' length(str)];
  435. str='';
  436. for i=1:length(pos)
  437. str=[str str0(pos0(i)+1:pos(i)-1) sprintf('_0x%X_',str0(pos(i)))];
  438. end
  439. if(pos(end)~=length(str))
  440. str=[str str0(pos0(end-1)+1:pos0(end))];
  441. end
  442. end
  443. %str(~isletter(str) & ~('0' <= str & str <= '9')) = '_';
  444. %%-------------------------------------------------------------------------
  445. function endpos = matching_quote(str,pos)
  446. len=length(str);
  447. while(pos<len)
  448. if(str(pos)=='"')
  449. if(~(pos>1 && str(pos-1)=='\'))
  450. endpos=pos;
  451. return;
  452. end
  453. end
  454. pos=pos+1;
  455. end
  456. error('unmatched quotation mark');
  457. %%-------------------------------------------------------------------------
  458. function [endpos e1l e1r maxlevel] = matching_bracket(str,pos)
  459. global arraytoken
  460. level=1;
  461. maxlevel=level;
  462. endpos=0;
  463. bpos=arraytoken(arraytoken>=pos);
  464. tokens=str(bpos);
  465. len=length(tokens);
  466. pos=1;
  467. e1l=[];
  468. e1r=[];
  469. while(pos<=len)
  470. c=tokens(pos);
  471. if(c==']')
  472. level=level-1;
  473. if(isempty(e1r)) e1r=bpos(pos); end
  474. if(level==0)
  475. endpos=bpos(pos);
  476. return
  477. end
  478. end
  479. if(c=='[')
  480. if(isempty(e1l)) e1l=bpos(pos); end
  481. level=level+1;
  482. maxlevel=max(maxlevel,level);
  483. end
  484. if(c=='"')
  485. pos=matching_quote(tokens,pos+1);
  486. end
  487. pos=pos+1;
  488. end
  489. if(endpos==0)
  490. error('unmatched "]"');
  491. end