@@ -617,117 +617,162 @@ static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
617617 }
618618}
619619
620- /* Multi-cpu list version. */
620+ #define CPU_MONDO_COUNTER (cpuid ) (cpu_mondo_counter[cpuid])
621+ #define MONDO_USEC_WAIT_MIN 2
622+ #define MONDO_USEC_WAIT_MAX 100
623+ #define MONDO_RETRY_LIMIT 500000
624+
625+ /* Multi-cpu list version.
626+ *
627+ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
628+ * Sometimes not all cpus receive the mondo, requiring us to re-send
629+ * the mondo until all cpus have received, or cpus are truly stuck
630+ * unable to receive mondo, and we timeout.
631+ * Occasionally a target cpu strand is borrowed briefly by hypervisor to
632+ * perform guest service, such as PCIe error handling. Consider the
633+ * service time, 1 second overall wait is reasonable for 1 cpu.
634+ * Here two in-between mondo check wait time are defined: 2 usec for
635+ * single cpu quick turn around and up to 100usec for large cpu count.
636+ * Deliver mondo to large number of cpus could take longer, we adjusts
637+ * the retry count as long as target cpus are making forward progress.
638+ */
621639static void hypervisor_xcall_deliver (struct trap_per_cpu * tb , int cnt )
622640{
623- int retries , this_cpu , prev_sent , i , saw_cpu_error ;
641+ int this_cpu , tot_cpus , prev_sent , i , rem ;
642+ int usec_wait , retries , tot_retries ;
643+ u16 first_cpu = 0xffff ;
644+ unsigned long xc_rcvd = 0 ;
624645 unsigned long status ;
646+ int ecpuerror_id = 0 ;
647+ int enocpu_id = 0 ;
625648 u16 * cpu_list ;
649+ u16 cpu ;
626650
627651 this_cpu = smp_processor_id ();
628-
629652 cpu_list = __va (tb -> cpu_list_pa );
630-
631- saw_cpu_error = 0 ;
632- retries = 0 ;
653+ usec_wait = cnt * MONDO_USEC_WAIT_MIN ;
654+ if (usec_wait > MONDO_USEC_WAIT_MAX )
655+ usec_wait = MONDO_USEC_WAIT_MAX ;
656+ retries = tot_retries = 0 ;
657+ tot_cpus = cnt ;
633658 prev_sent = 0 ;
659+
634660 do {
635- int forward_progress , n_sent ;
661+ int n_sent , mondo_delivered , target_cpu_busy ;
636662
637663 status = sun4v_cpu_mondo_send (cnt ,
638664 tb -> cpu_list_pa ,
639665 tb -> cpu_mondo_block_pa );
640666
641667 /* HV_EOK means all cpus received the xcall, we're done. */
642668 if (likely (status == HV_EOK ))
643- break ;
669+ goto xcall_done ;
670+
671+ /* If not these non-fatal errors, panic */
672+ if (unlikely ((status != HV_EWOULDBLOCK ) &&
673+ (status != HV_ECPUERROR ) &&
674+ (status != HV_ENOCPU )))
675+ goto fatal_errors ;
644676
645677 /* First, see if we made any forward progress.
678+ *
679+ * Go through the cpu_list, count the target cpus that have
680+ * received our mondo (n_sent), and those that did not (rem).
681+ * Re-pack cpu_list with the cpus remain to be retried in the
682+ * front - this simplifies tracking the truly stalled cpus.
646683 *
647684 * The hypervisor indicates successful sends by setting
648685 * cpu list entries to the value 0xffff.
686+ *
687+ * EWOULDBLOCK means some target cpus did not receive the
688+ * mondo and retry usually helps.
689+ *
690+ * ECPUERROR means at least one target cpu is in error state,
691+ * it's usually safe to skip the faulty cpu and retry.
692+ *
693+ * ENOCPU means one of the target cpu doesn't belong to the
694+ * domain, perhaps offlined which is unexpected, but not
695+ * fatal and it's okay to skip the offlined cpu.
649696 */
697+ rem = 0 ;
650698 n_sent = 0 ;
651699 for (i = 0 ; i < cnt ; i ++ ) {
652- if (likely (cpu_list [i ] == 0xffff ))
700+ cpu = cpu_list [i ];
701+ if (likely (cpu == 0xffff )) {
653702 n_sent ++ ;
703+ } else if ((status == HV_ECPUERROR ) &&
704+ (sun4v_cpu_state (cpu ) == HV_CPU_STATE_ERROR )) {
705+ ecpuerror_id = cpu + 1 ;
706+ } else if (status == HV_ENOCPU && !cpu_online (cpu )) {
707+ enocpu_id = cpu + 1 ;
708+ } else {
709+ cpu_list [rem ++ ] = cpu ;
710+ }
654711 }
655712
656- forward_progress = 0 ;
657- if (n_sent > prev_sent )
658- forward_progress = 1 ;
713+ /* No cpu remained, we're done. */
714+ if (rem == 0 )
715+ break ;
659716
660- prev_sent = n_sent ;
717+ /* Otherwise, update the cpu count for retry. */
718+ cnt = rem ;
661719
662- /* If we get a HV_ECPUERROR, then one or more of the cpus
663- * in the list are in error state. Use the cpu_state()
664- * hypervisor call to find out which cpus are in error state.
720+ /* Record the overall number of mondos received by the
721+ * first of the remaining cpus.
665722 */
666- if (unlikely ( status == HV_ECPUERROR ) ) {
667- for ( i = 0 ; i < cnt ; i ++ ) {
668- long err ;
669- u16 cpu ;
723+ if (first_cpu != cpu_list [ 0 ] ) {
724+ first_cpu = cpu_list [ 0 ];
725+ xc_rcvd = CPU_MONDO_COUNTER ( first_cpu ) ;
726+ }
670727
671- cpu = cpu_list [ i ];
672- if ( cpu == 0xffff )
673- continue ;
728+ /* Was any mondo delivered successfully? */
729+ mondo_delivered = ( n_sent > prev_sent );
730+ prev_sent = n_sent ;
674731
675- err = sun4v_cpu_state (cpu );
676- if (err == HV_CPU_STATE_ERROR ) {
677- saw_cpu_error = (cpu + 1 );
678- cpu_list [i ] = 0xffff ;
679- }
680- }
681- } else if (unlikely (status != HV_EWOULDBLOCK ))
682- goto fatal_mondo_error ;
732+ /* or, was any target cpu busy processing other mondos? */
733+ target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER (first_cpu ));
734+ xc_rcvd = CPU_MONDO_COUNTER (first_cpu );
683735
684- /* Don't bother rewriting the CPU list, just leave the
685- * 0xffff and non-0xffff entries in there and the
686- * hypervisor will do the right thing.
687- *
688- * Only advance timeout state if we didn't make any
689- * forward progress.
736+ /* Retry count is for no progress. If we're making progress,
737+ * reset the retry count.
690738 */
691- if (unlikely (!forward_progress )) {
692- if (unlikely (++ retries > 10000 ))
693- goto fatal_mondo_timeout ;
694-
695- /* Delay a little bit to let other cpus catch up
696- * on their cpu mondo queue work.
697- */
698- udelay (2 * cnt );
739+ if (likely (mondo_delivered || target_cpu_busy )) {
740+ tot_retries += retries ;
741+ retries = 0 ;
742+ } else if (unlikely (retries > MONDO_RETRY_LIMIT )) {
743+ goto fatal_mondo_timeout ;
699744 }
700- } while (1 );
701745
702- if (unlikely (saw_cpu_error ))
703- goto fatal_mondo_cpu_error ;
746+ /* Delay a little bit to let other cpus catch up on
747+ * their cpu mondo queue work.
748+ */
749+ if (!mondo_delivered )
750+ udelay (usec_wait );
704751
705- return ;
752+ retries ++ ;
753+ } while (1 );
706754
707- fatal_mondo_cpu_error :
708- printk (KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
709- "(including %d) were in error state\n" ,
710- this_cpu , saw_cpu_error - 1 );
755+ xcall_done :
756+ if (unlikely (ecpuerror_id > 0 )) {
757+ pr_crit ("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n" ,
758+ this_cpu , ecpuerror_id - 1 );
759+ } else if (unlikely (enocpu_id > 0 )) {
760+ pr_crit ("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n" ,
761+ this_cpu , enocpu_id - 1 );
762+ }
711763 return ;
712764
765+ fatal_errors :
766+ /* fatal errors include bad alignment, etc */
767+ pr_crit ("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n" ,
768+ this_cpu , tot_cpus , tb -> cpu_list_pa , tb -> cpu_mondo_block_pa );
769+ panic ("Unexpected SUN4V mondo error %lu\n" , status );
770+
713771fatal_mondo_timeout :
714- printk (KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
715- " progress after %d retries.\n" ,
716- this_cpu , retries );
717- goto dump_cpu_list_and_out ;
718-
719- fatal_mondo_error :
720- printk (KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n" ,
721- this_cpu , status );
722- printk (KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
723- "mondo_block_pa(%lx)\n" ,
724- this_cpu , cnt , tb -> cpu_list_pa , tb -> cpu_mondo_block_pa );
725-
726- dump_cpu_list_and_out :
727- printk (KERN_CRIT "CPU[%d]: CPU list [ " , this_cpu );
728- for (i = 0 ; i < cnt ; i ++ )
729- printk ("%u " , cpu_list [i ]);
730- printk ("]\n" );
772+ /* some cpus being non-responsive to the cpu mondo */
773+ pr_crit ("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n" ,
774+ this_cpu , first_cpu , (tot_retries + retries ), tot_cpus );
775+ panic ("SUN4V mondo timeout panic\n" );
731776}
732777
733778static void (* xcall_deliver_impl )(struct trap_per_cpu * , int );
0 commit comments