3838import copy
3939import functools
4040import inspect
41+ import itertools
4142import json
4243import sys
4344
5354# pylint: disable=E0611,W0404
5455if sys .version_info >= (3 , 0 ):
5556 basestring = (bytes , str ) # pylint: disable=C0103,W0622
56- from itertools import zip_longest
57- else :
58- from itertools import izip_longest as zip_longest
5957
6058
6159class JsonPatchException (Exception ):
@@ -282,15 +280,15 @@ def compare_values(path, value, other):
282280 if value == other :
283281 return
284282 if isinstance (value , dict ) and isinstance (other , dict ):
285- for operation in compare_dict (path , value , other ):
283+ for operation in compare_dicts (path , value , other ):
286284 yield operation
287285 elif isinstance (value , list ) and isinstance (other , list ):
288- for operation in compare_list (path , value , other ):
286+ for operation in compare_lists (path , value , other ):
289287 yield operation
290288 else :
291289 yield {'op' : 'replace' , 'path' : '/' .join (path ), 'value' : other }
292290
293- def compare_dict (path , src , dst ):
291+ def compare_dicts (path , src , dst ):
294292 for key in src :
295293 if key not in dst :
296294 yield {'op' : 'remove' , 'path' : '/' .join (path + [key ])}
@@ -304,23 +302,10 @@ def compare_dict(path, src, dst):
304302 'path' : '/' .join (path + [key ]),
305303 'value' : dst [key ]}
306304
307- def compare_list (path , src , dst ):
308- lsrc , ldst = len (src ), len (dst )
309- for idx in range (min (lsrc , ldst )):
310- current = path + [str (idx )]
311- for operation in compare_values (current , src [idx ], dst [idx ]):
312- yield operation
313- if lsrc < ldst :
314- for idx in range (lsrc , ldst ):
315- current = path + [str (idx )]
316- yield {'op' : 'add' ,
317- 'path' : '/' .join (current ),
318- 'value' : dst [idx ]}
319- elif lsrc > ldst :
320- for idx in reversed (range (ldst , lsrc )):
321- yield {'op' : 'remove' , 'path' : '/' .join (path + [str (idx )])}
305+ def compare_lists (path , src , dst ):
306+ return _compare_lists (path , src , dst )
322307
323- return cls (list (compare_dict (['' ], src , dst )))
308+ return cls (list (compare_dicts (['' ], src , dst )))
324309
325310 def to_string (self ):
326311 """Returns patch set as JSON string."""
@@ -527,3 +512,230 @@ def apply(self, obj):
527512 }).apply (obj )
528513
529514 return obj
515+
516+
517+ def _compare_lists (path , src , dst ):
518+ """Compares two lists objects and return JSON patch about."""
519+ return _optimize (_compare (path , src , dst , * _split_by_common_seq (src , dst )))
520+
521+
522+ def _longest_common_subseq (src , dst ):
523+ """Returns pair of ranges of longest common subsequence for the `src`
524+ and `dst` lists.
525+
526+ >>> src = [1, 2, 3, 4]
527+ >>> dst = [0, 1, 2, 3, 5]
528+ >>> # The longest common subsequence for these lists is [1, 2, 3]
529+ ... # which is located at (0, 3) index range for src list and (1, 4) for
530+ ... # dst one. Tuple of these ranges we should get back.
531+ ... assert ((0, 3), (1, 4)) == _longest_common_subseq(src, dst)
532+ """
533+ lsrc , ldst = len (src ), len (dst )
534+ drange = list (range (ldst ))
535+ matrix = [[0 ] * ldst for _ in range (lsrc )]
536+ z = 0 # length of the longest subsequence
537+ range_src , range_dst = None , None
538+ for i , j in itertools .product (range (lsrc ), drange ):
539+ if src [i ] == dst [j ]:
540+ if i == 0 or j == 0 :
541+ matrix [i ][j ] = 1
542+ else :
543+ matrix [i ][j ] = matrix [i - 1 ][j - 1 ] + 1
544+ if matrix [i ][j ] > z :
545+ z = matrix [i ][j ]
546+ if matrix [i ][j ] == z :
547+ range_src = (i - z + 1 , i + 1 )
548+ range_dst = (j - z + 1 , j + 1 )
549+ else :
550+ matrix [i ][j ] = 0
551+ return range_src , range_dst
552+
553+
554+ def _split_by_common_seq (src , dst , bx = (0 , - 1 ), by = (0 , - 1 )):
555+ """Recursively splits the `dst` list onto two parts: left and right.
556+ The left part contains differences on left from common subsequence,
557+ same as the right part by for other side.
558+
559+ To easily understand the process let's take two lists: [0, 1, 2, 3] as
560+ `src` and [1, 2, 4, 5] for `dst`. If we've tried to generate the binary tree
561+ where nodes are common subsequence for both lists, leaves on the left
562+ side are subsequence for `src` list and leaves on the right one for `dst`,
563+ our tree would looks like::
564+
565+ [1, 2]
566+ / \
567+ [0] []
568+ / \
569+ [3] [4, 5]
570+
571+ This function generate the similar structure as flat tree, but without
572+ nodes with common subsequences - since we're don't need them - only with
573+ left and right leaves::
574+
575+ []
576+ / \
577+ [0] []
578+ / \
579+ [3] [4, 5]
580+
581+ The `bx` is the absolute range for currently processed subsequence of
582+ `src` list. The `by` means the same, but for the `dst` list.
583+ """
584+ # Prevent useless comparisons in future
585+ bx = bx if bx [0 ] != bx [1 ] else None
586+ by = by if by [0 ] != by [1 ] else None
587+
588+ if not src :
589+ return [None , by ]
590+ elif not dst :
591+ return [bx , None ]
592+
593+ # note that these ranges are relative for processed sublists
594+ x , y = _longest_common_subseq (src , dst )
595+
596+ if x is None or y is None : # no more any common subsequence
597+ return [bx , by ]
598+
599+ return [_split_by_common_seq (src [:x [0 ]], dst [:y [0 ]],
600+ (bx [0 ], bx [0 ] + x [0 ]),
601+ (by [0 ], by [0 ] + y [0 ])),
602+ _split_by_common_seq (src [x [1 ]:], dst [y [1 ]:],
603+ (bx [0 ] + x [1 ], bx [0 ] + len (src )),
604+ (bx [0 ] + y [1 ], bx [0 ] + len (dst )))]
605+
606+
607+ def _compare (path , src , dst , left , right ):
608+ """Same as :func:`_compare_with_shift` but strips emitted `shift` value."""
609+ for op , _ in _compare_with_shift (path , src , dst , left , right , 0 ):
610+ yield op
611+
612+
613+ def _compare_with_shift (path , src , dst , left , right , shift ):
614+ """Recursively compares differences from `left` and `right` sides
615+ from common subsequences.
616+
617+ The `shift` parameter is used to store index shift which caused
618+ by ``add`` and ``remove`` operations.
619+
620+ Yields JSON patch operations and list index shift.
621+ """
622+ if isinstance (left , list ):
623+ for item , shift in _compare_with_shift (path , src , dst , * left ,
624+ shift = shift ):
625+ yield item , shift
626+ elif left is not None :
627+ for item , shift in _compare_left (path , src , left , shift ):
628+ yield item , shift
629+
630+ if isinstance (right , list ):
631+ for item , shift in _compare_with_shift (path , src , dst , * right ,
632+ shift = shift ):
633+ yield item , shift
634+ elif right is not None :
635+ for item , shift in _compare_right (path , dst , right , shift ):
636+ yield item , shift
637+
638+
639+ def _compare_left (path , src , left , shift ):
640+ """Yields JSON patch ``remove`` operations for elements that are only
641+ exists in the `src` list."""
642+ start , end = left
643+ if end == - 1 :
644+ end = len (src )
645+ # we need to `remove` elements from list tail to not deal with index shift
646+ for idx in reversed (range (start + shift , end + shift )):
647+ current = path + [str (idx )]
648+ yield (
649+ {'op' : 'remove' ,
650+ # yes, there should be any value field, but we'll use it
651+ # to apply `move` optimization a bit later and will remove
652+ # it in _optimize function.
653+ 'value' : src [idx - shift ],
654+ 'path' : '/' .join (current )},
655+ shift - 1
656+ )
657+ shift -= 1
658+
659+
660+ def _compare_right (path , dst , right , shift ):
661+ """Yields JSON patch ``add`` operations for elements that are only
662+ exists in the `dst` list"""
663+ start , end = right
664+ if end == - 1 :
665+ end = len (dst )
666+ for idx in range (start , end ):
667+ current = path + [str (idx )]
668+ yield (
669+ {'op' : 'add' , 'path' : '/' .join (current ), 'value' : dst [idx ]},
670+ shift + 1
671+ )
672+ shift += 1
673+
674+
675+ def _optimize (operations ):
676+ """Optimizes operations which was produced by lists comparison.
677+
678+ Actually it does two kinds of optimizations:
679+
680+ 1. Seeks pair of ``remove`` and ``add`` operations against the same path
681+ and replaces them with ``replace`` operation.
682+ 2. Seeks pair of ``remove`` and ``add`` operations for the same value
683+ and replaces them with ``move`` operation.
684+ """
685+ result = []
686+ ops_by_path = {}
687+ ops_by_value = {}
688+ add_remove = set (['add' , 'remove' ])
689+ for item in operations :
690+ # could we apply "move" optimization for dict values?
691+ hashable_value = not isinstance (item ['value' ], (dict , list ))
692+ if item ['path' ] in ops_by_path :
693+ _optimize_using_replace (ops_by_path [item ['path' ]], item )
694+ continue
695+ if hashable_value and item ['value' ] in ops_by_value :
696+ prev_item = ops_by_value [item ['value' ]]
697+ # ensure that we processing pair of add-remove ops
698+ if set ([item ['op' ], prev_item ['op' ]]) == add_remove :
699+ _optimize_using_move (prev_item , item )
700+ ops_by_value .pop (item ['value' ])
701+ continue
702+ result .append (item )
703+ ops_by_path [item ['path' ]] = item
704+ if hashable_value :
705+ ops_by_value [item ['value' ]] = item
706+
707+ # cleanup
708+ ops_by_path .clear ()
709+ ops_by_value .clear ()
710+ for item in result :
711+ if item ['op' ] == 'remove' :
712+ item .pop ('value' ) # strip our hack
713+ yield item
714+
715+
716+ def _optimize_using_replace (prev , cur ):
717+ """Optimises JSON patch by using ``replace`` operation instead of
718+ ``remove`` and ``add`` against the same path."""
719+ prev ['op' ] = 'replace'
720+ if cur ['op' ] == 'add' :
721+ prev ['value' ] = cur ['value' ]
722+
723+
724+ def _optimize_using_move (prev_item , item ):
725+ """Optimises JSON patch by using ``move`` operation instead of
726+ ``remove` and ``add`` against the different paths but for the same value."""
727+ prev_item ['op' ] = 'move'
728+ move_from , move_to = [
729+ (item ['path' ], prev_item ['path' ]),
730+ (prev_item ['path' ], item ['path' ]),
731+ ][item ['op' ] == 'add' ]
732+ if item ['op' ] == 'add' : # first was remove then add
733+ prev_item ['from' ] = move_from
734+ prev_item ['path' ] = move_to
735+ else : # first was add then remove
736+ head , move_from = move_from .rsplit ('/' , 1 )
737+ # since add operation was first it incremented
738+ # overall index shift value. we have to fix this
739+ move_from = int (move_from ) - 1
740+ prev_item ['from' ] = head + '/%d' % move_from
741+ prev_item ['path' ] = move_to
0 commit comments