diff options
| -rw-r--r-- | series | 5 | ||||
| -rw-r--r-- | staging/staging-add-echo-cancelation-module.patch | 1847 | ||||
| -rw-r--r-- | staging/staging-slicoss-fix-warnings-due-to-static-usage.patch | 83 | ||||
| -rw-r--r-- | usb/usb-ehci-fix-remote-wakeup-support-for-arc-tdi-core.patch | 151 | ||||
| -rw-r--r-- | usb/usb-snoop-processes-opening-usbfs-device-files.patch | 31 |
5 files changed, 2117 insertions, 0 deletions
@@ -157,6 +157,8 @@ usb/usb-anchor-api-changes-needed-for-btusb.patch usb/usb-ehci-ohci-uhci-remove-version-numbers.patch usb/usb-ehci-log-a-warning-if-ehci-hcd-is-not-loaded-first.patch usb/usb-option-anydata-new-modem-same-id.patch +usb/usb-snoop-processes-opening-usbfs-device-files.patch +usb/usb-ehci-fix-remote-wakeup-support-for-arc-tdi-core.patch usb/fsl_usb2_udc-make-dr_ep_setup-function-static.patch usb/fsl_usb2_udc-remove-check-for-udc-null-in-dr_controller_setup.patch @@ -209,10 +211,12 @@ staging/usb-usb-ip-add-client-driver.patch staging/usb-usb-ip-add-host-driver.patch staging/staging-add-w35und-wifi-driver.patch staging/staging-add-wlan-ng-prism2-usb-driver.patch +staging/staging-add-echo-cancelation-module.patch staging/staging-fix-gcc-warnings-in-sxg.patch staging/staging-go7007-v4l-fixes.patch staging/staging-slicoss-lots-of-checkpatch-fixes.patch +staging/staging-slicoss-fix-warnings-due-to-static-usage.patch # can not even compile :( #staging/usb-add-princeton-instruments-usb-camera-driver.patch @@ -240,3 +244,4 @@ staging/staging-slicoss-lots-of-checkpatch-fixes.patch #f.patch + diff --git a/staging/staging-add-echo-cancelation-module.patch b/staging/staging-add-echo-cancelation-module.patch new file mode 100644 index 00000000000000..f884ab02231cfb --- /dev/null +++ b/staging/staging-add-echo-cancelation-module.patch @@ -0,0 +1,1847 @@ +From b853721638c84e44b05757e093074f93cdbfcb56 Mon Sep 17 00:00:00 2001 +From: David Rowe <david@rowetel.com> +Date: Mon, 6 Oct 2008 21:41:46 -0700 +Subject: Staging: add echo cancelation module + +This is used by mISDN and Zaptel drivers. + +From: Steve Underwood <steveu@coppice.org> +From: David Rowe <david@rowetel.com> +Cc: Tzafrir Cohen <tzafrir.cohen@xorcom.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/staging/Kconfig | 2 + drivers/staging/Makefile | 1 + drivers/staging/echo/Kconfig | 9 + drivers/staging/echo/Makefile | 1 + drivers/staging/echo/TODO | 10 + drivers/staging/echo/bit_operations.h | 253 +++++++++++++ + drivers/staging/echo/echo.c | 632 ++++++++++++++++++++++++++++++++++ + drivers/staging/echo/echo.h | 220 +++++++++++ + drivers/staging/echo/fir.h | 369 +++++++++++++++++++ + drivers/staging/echo/mmx.h | 288 +++++++++++++++ + 10 files changed, 1785 insertions(+) + +--- /dev/null ++++ b/drivers/staging/echo/bit_operations.h +@@ -0,0 +1,253 @@ ++/* ++ * SpanDSP - a series of DSP components for telephony ++ * ++ * bit_operations.h - Various bit level operations, such as bit reversal ++ * ++ * Written by Steve Underwood <steveu@coppice.org> ++ * ++ * Copyright (C) 2006 Steve Underwood ++ * ++ * All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2, as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ * $Id: bit_operations.h,v 1.11 2006/11/28 15:37:03 steveu Exp $ ++ */ ++ ++/*! \file */ ++ ++#if !defined(_BIT_OPERATIONS_H_) ++#define _BIT_OPERATIONS_H_ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#if defined(__i386__) || defined(__x86_64__) ++/*! \brief Find the bit position of the highest set bit in a word ++ \param bits The word to be searched ++ \return The bit number of the highest set bit, or -1 if the word is zero. */ ++static __inline__ int top_bit(unsigned int bits) ++{ ++ int res; ++ ++ __asm__ (" xorl %[res],%[res];\n" ++ " decl %[res];\n" ++ " bsrl %[bits],%[res]\n" ++ : [res] "=&r" (res) ++ : [bits] "rm" (bits)); ++ return res; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++/*! \brief Find the bit position of the lowest set bit in a word ++ \param bits The word to be searched ++ \return The bit number of the lowest set bit, or -1 if the word is zero. */ ++static __inline__ int bottom_bit(unsigned int bits) ++{ ++ int res; ++ ++ __asm__ (" xorl %[res],%[res];\n" ++ " decl %[res];\n" ++ " bsfl %[bits],%[res]\n" ++ : [res] "=&r" (res) ++ : [bits] "rm" (bits)); ++ return res; ++} ++/*- End of function --------------------------------------------------------*/ ++#else ++static __inline__ int top_bit(unsigned int bits) ++{ ++ int i; ++ ++ if (bits == 0) ++ return -1; ++ i = 0; ++ if (bits & 0xFFFF0000) ++ { ++ bits &= 0xFFFF0000; ++ i += 16; ++ } ++ if (bits & 0xFF00FF00) ++ { ++ bits &= 0xFF00FF00; ++ i += 8; ++ } ++ if (bits & 0xF0F0F0F0) ++ { ++ bits &= 0xF0F0F0F0; ++ i += 4; ++ } ++ if (bits & 0xCCCCCCCC) ++ { ++ bits &= 0xCCCCCCCC; ++ i += 2; ++ } ++ if (bits & 0xAAAAAAAA) ++ { ++ bits &= 0xAAAAAAAA; ++ i += 1; ++ } ++ return i; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++static __inline__ int bottom_bit(unsigned int bits) ++{ ++ int i; ++ ++ if (bits == 0) ++ return -1; ++ i = 32; ++ if (bits & 0x0000FFFF) ++ { ++ bits &= 0x0000FFFF; ++ i -= 16; ++ } ++ if (bits & 0x00FF00FF) ++ { ++ bits &= 0x00FF00FF; ++ i -= 8; ++ } ++ if (bits & 0x0F0F0F0F) ++ { ++ bits &= 0x0F0F0F0F; ++ i -= 4; ++ } ++ if (bits & 0x33333333) ++ { ++ bits &= 0x33333333; ++ i -= 2; ++ } ++ if (bits & 0x55555555) ++ { ++ bits &= 0x55555555; ++ i -= 1; ++ } ++ return i; ++} ++/*- End of function --------------------------------------------------------*/ ++#endif ++ ++/*! \brief Bit reverse a byte. ++ \param data The byte to be reversed. ++ \return The bit reversed version of data. */ ++static __inline__ uint8_t bit_reverse8(uint8_t x) ++{ ++#if defined(__i386__) || defined(__x86_64__) ++ /* If multiply is fast */ ++ return ((x*0x0802U & 0x22110U) | (x*0x8020U & 0x88440U))*0x10101U >> 16; ++#else ++ /* If multiply is slow, but we have a barrel shifter */ ++ x = (x >> 4) | (x << 4); ++ x = ((x & 0xCC) >> 2) | ((x & 0x33) << 2); ++ return ((x & 0xAA) >> 1) | ((x & 0x55) << 1); ++#endif ++} ++/*- End of function --------------------------------------------------------*/ ++ ++/*! \brief Bit reverse a 16 bit word. ++ \param data The word to be reversed. ++ \return The bit reversed version of data. */ ++uint16_t bit_reverse16(uint16_t data); ++ ++/*! \brief Bit reverse a 32 bit word. ++ \param data The word to be reversed. ++ \return The bit reversed version of data. */ ++uint32_t bit_reverse32(uint32_t data); ++ ++/*! \brief Bit reverse each of the four bytes in a 32 bit word. ++ \param data The word to be reversed. ++ \return The bit reversed version of data. */ ++uint32_t bit_reverse_4bytes(uint32_t data); ++ ++/*! \brief Find the number of set bits in a 32 bit word. ++ \param x The word to be searched. ++ \return The number of set bits. */ ++int one_bits32(uint32_t x); ++ ++/*! \brief Create a mask as wide as the number in a 32 bit word. ++ \param x The word to be searched. ++ \return The mask. */ ++uint32_t make_mask32(uint32_t x); ++ ++/*! \brief Create a mask as wide as the number in a 16 bit word. ++ \param x The word to be searched. ++ \return The mask. */ ++uint16_t make_mask16(uint16_t x); ++ ++/*! \brief Find the least significant one in a word, and return a word ++ with just that bit set. ++ \param x The word to be searched. ++ \return The word with the single set bit. */ ++static __inline__ uint32_t least_significant_one32(uint32_t x) ++{ ++ return (x & (-(int32_t) x)); ++} ++/*- End of function --------------------------------------------------------*/ ++ ++/*! \brief Find the most significant one in a word, and return a word ++ with just that bit set. ++ \param x The word to be searched. ++ \return The word with the single set bit. */ ++static __inline__ uint32_t most_significant_one32(uint32_t x) ++{ ++#if defined(__i386__) || defined(__x86_64__) ++ return 1 << top_bit(x); ++#else ++ x = make_mask32(x); ++ return (x ^ (x >> 1)); ++#endif ++} ++/*- End of function --------------------------------------------------------*/ ++ ++/*! \brief Find the parity of a byte. ++ \param x The byte to be checked. ++ \return 1 for odd, or 0 for even. */ ++static __inline__ int parity8(uint8_t x) ++{ ++ x = (x ^ (x >> 4)) & 0x0F; ++ return (0x6996 >> x) & 1; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++/*! \brief Find the parity of a 16 bit word. ++ \param x The word to be checked. ++ \return 1 for odd, or 0 for even. */ ++static __inline__ int parity16(uint16_t x) ++{ ++ x ^= (x >> 8); ++ x = (x ^ (x >> 4)) & 0x0F; ++ return (0x6996 >> x) & 1; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++/*! \brief Find the parity of a 32 bit word. ++ \param x The word to be checked. ++ \return 1 for odd, or 0 for even. */ ++static __inline__ int parity32(uint32_t x) ++{ ++ x ^= (x >> 16); ++ x ^= (x >> 8); ++ x = (x ^ (x >> 4)) & 0x0F; ++ return (0x6996 >> x) & 1; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif ++/*- End of file ------------------------------------------------------------*/ +--- /dev/null ++++ b/drivers/staging/echo/echo.c +@@ -0,0 +1,632 @@ ++/* ++ * SpanDSP - a series of DSP components for telephony ++ * ++ * echo.c - A line echo canceller. This code is being developed ++ * against and partially complies with G168. ++ * ++ * Written by Steve Underwood <steveu@coppice.org> ++ * and David Rowe <david_at_rowetel_dot_com> ++ * ++ * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe ++ * ++ * Based on a bit from here, a bit from there, eye of toad, ear of ++ * bat, 15 years of failed attempts by David and a few fried brain ++ * cells. ++ * ++ * All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2, as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ * $Id: echo.c,v 1.20 2006/12/01 18:00:48 steveu Exp $ ++ */ ++ ++/*! \file */ ++ ++/* Implementation Notes ++ David Rowe ++ April 2007 ++ ++ This code started life as Steve's NLMS algorithm with a tap ++ rotation algorithm to handle divergence during double talk. I ++ added a Geigel Double Talk Detector (DTD) [2] and performed some ++ G168 tests. However I had trouble meeting the G168 requirements, ++ especially for double talk - there were always cases where my DTD ++ failed, for example where near end speech was under the 6dB ++ threshold required for declaring double talk. ++ ++ So I tried a two path algorithm [1], which has so far given better ++ results. The original tap rotation/Geigel algorithm is available ++ in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit. ++ It's probably possible to make it work if some one wants to put some ++ serious work into it. ++ ++ At present no special treatment is provided for tones, which ++ generally cause NLMS algorithms to diverge. Initial runs of a ++ subset of the G168 tests for tones (e.g ./echo_test 6) show the ++ current algorithm is passing OK, which is kind of surprising. The ++ full set of tests needs to be performed to confirm this result. ++ ++ One other interesting change is that I have managed to get the NLMS ++ code to work with 16 bit coefficients, rather than the original 32 ++ bit coefficents. This reduces the MIPs and storage required. ++ I evaulated the 16 bit port using g168_tests.sh and listening tests ++ on 4 real-world samples. ++ ++ I also attempted the implementation of a block based NLMS update ++ [2] but although this passes g168_tests.sh it didn't converge well ++ on the real-world samples. I have no idea why, perhaps a scaling ++ problem. The block based code is also available in SVN ++ http://svn.rowetel.com/software/oslec/tags/before_16bit. If this ++ code can be debugged, it will lead to further reduction in MIPS, as ++ the block update code maps nicely onto DSP instruction sets (it's a ++ dot product) compared to the current sample-by-sample update. ++ ++ Steve also has some nice notes on echo cancellers in echo.h ++ ++ ++ References: ++ ++ [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo ++ Path Models", IEEE Transactions on communications, COM-25, ++ No. 6, June ++ 1977. ++ http://www.rowetel.com/images/echo/dual_path_paper.pdf ++ ++ [2] The classic, very useful paper that tells you how to ++ actually build a real world echo canceller: ++ Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice ++ Echo Canceller with a TMS320020, ++ http://www.rowetel.com/images/echo/spra129.pdf ++ ++ [3] I have written a series of blog posts on this work, here is ++ Part 1: http://www.rowetel.com/blog/?p=18 ++ ++ [4] The source code http://svn.rowetel.com/software/oslec/ ++ ++ [5] A nice reference on LMS filters: ++ http://en.wikipedia.org/wiki/Least_mean_squares_filter ++ ++ Credits: ++ ++ Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan ++ Muthukrishnan for their suggestions and email discussions. Thanks ++ also to those people who collected echo samples for me such as ++ Mark, Pawel, and Pavel. ++*/ ++ ++#include <linux/kernel.h> /* We're doing kernel work */ ++#include <linux/module.h> ++#include <linux/kernel.h> ++#include <linux/slab.h> ++#define malloc(a) kmalloc((a), GFP_KERNEL) ++#define free(a) kfree(a) ++ ++#include "bit_operations.h" ++#include "echo.h" ++ ++#define MIN_TX_POWER_FOR_ADAPTION 64 ++#define MIN_RX_POWER_FOR_ADAPTION 64 ++#define DTD_HANGOVER 600 /* 600 samples, or 75ms */ ++#define DC_LOG2BETA 3 /* log2() of DC filter Beta */ ++ ++/*-----------------------------------------------------------------------*\ ++ FUNCTIONS ++\*-----------------------------------------------------------------------*/ ++ ++/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ ++ ++ ++#ifdef __BLACKFIN_ASM__ ++static void __inline__ lms_adapt_bg(echo_can_state_t *ec, int clean, int shift) ++{ ++ int i, j; ++ int offset1; ++ int offset2; ++ int factor; ++ int exp; ++ int16_t *phist; ++ int n; ++ ++ if (shift > 0) ++ factor = clean << shift; ++ else ++ factor = clean >> -shift; ++ ++ /* Update the FIR taps */ ++ ++ offset2 = ec->curr_pos; ++ offset1 = ec->taps - offset2; ++ phist = &ec->fir_state_bg.history[offset2]; ++ ++ /* st: and en: help us locate the assembler in echo.s */ ++ ++ //asm("st:"); ++ n = ec->taps; ++ for (i = 0, j = offset2; i < n; i++, j++) ++ { ++ exp = *phist++ * factor; ++ ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15); ++ } ++ //asm("en:"); ++ ++ /* Note the asm for the inner loop above generated by Blackfin gcc ++ 4.1.1 is pretty good (note even parallel instructions used): ++ ++ R0 = W [P0++] (X); ++ R0 *= R2; ++ R0 = R0 + R3 (NS) || ++ R1 = W [P1] (X) || ++ nop; ++ R0 >>>= 15; ++ R0 = R0 + R1; ++ W [P1++] = R0; ++ ++ A block based update algorithm would be much faster but the ++ above can't be improved on much. Every instruction saved in ++ the loop above is 2 MIPs/ch! The for loop above is where the ++ Blackfin spends most of it's time - about 17 MIPs/ch measured ++ with speedtest.c with 256 taps (32ms). Write-back and ++ Write-through cache gave about the same performance. ++ */ ++} ++ ++/* ++ IDEAS for further optimisation of lms_adapt_bg(): ++ ++ 1/ The rounding is quite costly. Could we keep as 32 bit coeffs ++ then make filter pluck the MS 16-bits of the coeffs when filtering? ++ However this would lower potential optimisation of filter, as I ++ think the dual-MAC architecture requires packed 16 bit coeffs. ++ ++ 2/ Block based update would be more efficient, as per comments above, ++ could use dual MAC architecture. ++ ++ 3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC ++ packing. ++ ++ 4/ Execute the whole e/c in a block of say 20ms rather than sample ++ by sample. Processing a few samples every ms is inefficient. ++*/ ++ ++#else ++static __inline__ void lms_adapt_bg(echo_can_state_t *ec, int clean, int shift) ++{ ++ int i; ++ ++ int offset1; ++ int offset2; ++ int factor; ++ int exp; ++ ++ if (shift > 0) ++ factor = clean << shift; ++ else ++ factor = clean >> -shift; ++ ++ /* Update the FIR taps */ ++ ++ offset2 = ec->curr_pos; ++ offset1 = ec->taps - offset2; ++ ++ for (i = ec->taps - 1; i >= offset1; i--) ++ { ++ exp = (ec->fir_state_bg.history[i - offset1]*factor); ++ ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15); ++ } ++ for ( ; i >= 0; i--) ++ { ++ exp = (ec->fir_state_bg.history[i + offset2]*factor); ++ ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15); ++ } ++} ++#endif ++ ++/*- End of function --------------------------------------------------------*/ ++ ++echo_can_state_t *echo_can_create(int len, int adaption_mode) ++{ ++ echo_can_state_t *ec; ++ int i; ++ int j; ++ ++ ec = kmalloc(sizeof(*ec), GFP_KERNEL); ++ if (ec == NULL) ++ return NULL; ++ memset(ec, 0, sizeof(*ec)); ++ ++ ec->taps = len; ++ ec->log2taps = top_bit(len); ++ ec->curr_pos = ec->taps - 1; ++ ++ for (i = 0; i < 2; i++) ++ { ++ if ((ec->fir_taps16[i] = (int16_t *) malloc((ec->taps)*sizeof(int16_t))) == NULL) ++ { ++ for (j = 0; j < i; j++) ++ kfree(ec->fir_taps16[j]); ++ kfree(ec); ++ return NULL; ++ } ++ memset(ec->fir_taps16[i], 0, (ec->taps)*sizeof(int16_t)); ++ } ++ ++ fir16_create(&ec->fir_state, ++ ec->fir_taps16[0], ++ ec->taps); ++ fir16_create(&ec->fir_state_bg, ++ ec->fir_taps16[1], ++ ec->taps); ++ ++ for(i=0; i<5; i++) { ++ ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0; ++ } ++ ++ ec->cng_level = 1000; ++ echo_can_adaption_mode(ec, adaption_mode); ++ ++ ec->snapshot = (int16_t*)malloc(ec->taps*sizeof(int16_t)); ++ memset(ec->snapshot, 0, sizeof(int16_t)*ec->taps); ++ ++ ec->cond_met = 0; ++ ec->Pstates = 0; ++ ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0; ++ ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0; ++ ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; ++ ec->Lbgn = ec->Lbgn_acc = 0; ++ ec->Lbgn_upper = 200; ++ ec->Lbgn_upper_acc = ec->Lbgn_upper << 13; ++ ++ return ec; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++void echo_can_free(echo_can_state_t *ec) ++{ ++ int i; ++ ++ fir16_free(&ec->fir_state); ++ fir16_free(&ec->fir_state_bg); ++ for (i = 0; i < 2; i++) ++ kfree(ec->fir_taps16[i]); ++ kfree(ec->snapshot); ++ kfree(ec); ++} ++/*- End of function --------------------------------------------------------*/ ++ ++void echo_can_adaption_mode(echo_can_state_t *ec, int adaption_mode) ++{ ++ ec->adaption_mode = adaption_mode; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++void echo_can_flush(echo_can_state_t *ec) ++{ ++ int i; ++ ++ ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0; ++ ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0; ++ ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; ++ ++ ec->Lbgn = ec->Lbgn_acc = 0; ++ ec->Lbgn_upper = 200; ++ ec->Lbgn_upper_acc = ec->Lbgn_upper << 13; ++ ++ ec->nonupdate_dwell = 0; ++ ++ fir16_flush(&ec->fir_state); ++ fir16_flush(&ec->fir_state_bg); ++ ec->fir_state.curr_pos = ec->taps - 1; ++ ec->fir_state_bg.curr_pos = ec->taps - 1; ++ for (i = 0; i < 2; i++) ++ memset(ec->fir_taps16[i], 0, ec->taps*sizeof(int16_t)); ++ ++ ec->curr_pos = ec->taps - 1; ++ ec->Pstates = 0; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++void echo_can_snapshot(echo_can_state_t *ec) { ++ memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps*sizeof(int16_t)); ++} ++/*- End of function --------------------------------------------------------*/ ++ ++/* Dual Path Echo Canceller ------------------------------------------------*/ ++ ++int16_t echo_can_update(echo_can_state_t *ec, int16_t tx, int16_t rx) ++{ ++ int32_t echo_value; ++ int clean_bg; ++ int tmp, tmp1; ++ ++ /* Input scaling was found be required to prevent problems when tx ++ starts clipping. Another possible way to handle this would be the ++ filter coefficent scaling. */ ++ ++ ec->tx = tx; ec->rx = rx; ++ tx >>=1; ++ rx >>=1; ++ ++ /* ++ Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required ++ otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta) ++ only real axis. Some chip sets (like Si labs) don't need ++ this, but something like a $10 X100P card does. Any DC really slows ++ down convergence. ++ ++ Note: removes some low frequency from the signal, this reduces ++ the speech quality when listening to samples through headphones ++ but may not be obvious through a telephone handset. ++ ++ Note that the 3dB frequency in radians is approx Beta, e.g. for ++ Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz. ++ */ ++ ++ if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) { ++ tmp = rx << 15; ++#if 1 ++ /* Make sure the gain of the HPF is 1.0. This can still saturate a little under ++ impulse conditions, and it might roll to 32768 and need clipping on sustained peak ++ level signals. However, the scale of such clipping is small, and the error due to ++ any saturation should not markedly affect the downstream processing. */ ++ tmp -= (tmp >> 4); ++#endif ++ ec->rx_1 += -(ec->rx_1>>DC_LOG2BETA) + tmp - ec->rx_2; ++ ++ /* hard limit filter to prevent clipping. Note that at this stage ++ rx should be limited to +/- 16383 due to right shift above */ ++ tmp1 = ec->rx_1 >> 15; ++ if (tmp1 > 16383) tmp1 = 16383; ++ if (tmp1 < -16383) tmp1 = -16383; ++ rx = tmp1; ++ ec->rx_2 = tmp; ++ } ++ ++ /* Block average of power in the filter states. Used for ++ adaption power calculation. */ ++ ++ { ++ int new, old; ++ ++ /* efficient "out with the old and in with the new" algorithm so ++ we don't have to recalculate over the whole block of ++ samples. */ ++ new = (int)tx * (int)tx; ++ old = (int)ec->fir_state.history[ec->fir_state.curr_pos] * ++ (int)ec->fir_state.history[ec->fir_state.curr_pos]; ++ ec->Pstates += ((new - old) + (1<<ec->log2taps)) >> ec->log2taps; ++ if (ec->Pstates < 0) ec->Pstates = 0; ++ } ++ ++ /* Calculate short term average levels using simple single pole IIRs */ ++ ++ ec->Ltxacc += abs(tx) - ec->Ltx; ++ ec->Ltx = (ec->Ltxacc + (1<<4)) >> 5; ++ ec->Lrxacc += abs(rx) - ec->Lrx; ++ ec->Lrx = (ec->Lrxacc + (1<<4)) >> 5; ++ ++ /* Foreground filter ---------------------------------------------------*/ ++ ++ ec->fir_state.coeffs = ec->fir_taps16[0]; ++ echo_value = fir16(&ec->fir_state, tx); ++ ec->clean = rx - echo_value; ++ ec->Lcleanacc += abs(ec->clean) - ec->Lclean; ++ ec->Lclean = (ec->Lcleanacc + (1<<4)) >> 5; ++ ++ /* Background filter ---------------------------------------------------*/ ++ ++ echo_value = fir16(&ec->fir_state_bg, tx); ++ clean_bg = rx - echo_value; ++ ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg; ++ ec->Lclean_bg = (ec->Lclean_bgacc + (1<<4)) >> 5; ++ ++ /* Background Filter adaption -----------------------------------------*/ ++ ++ /* Almost always adap bg filter, just simple DT and energy ++ detection to minimise adaption in cases of strong double talk. ++ However this is not critical for the dual path algorithm. ++ */ ++ ec->factor = 0; ++ ec->shift = 0; ++ if ((ec->nonupdate_dwell == 0)) { ++ int P, logP, shift; ++ ++ /* Determine: ++ ++ f = Beta * clean_bg_rx/P ------ (1) ++ ++ where P is the total power in the filter states. ++ ++ The Boffins have shown that if we obey (1) we converge ++ quickly and avoid instability. ++ ++ The correct factor f must be in Q30, as this is the fixed ++ point format required by the lms_adapt_bg() function, ++ therefore the scaled version of (1) is: ++ ++ (2^30) * f = (2^30) * Beta * clean_bg_rx/P ++ factor = (2^30) * Beta * clean_bg_rx/P ----- (2) ++ ++ We have chosen Beta = 0.25 by experiment, so: ++ ++ factor = (2^30) * (2^-2) * clean_bg_rx/P ++ ++ (30 - 2 - log2(P)) ++ factor = clean_bg_rx 2 ----- (3) ++ ++ To avoid a divide we approximate log2(P) as top_bit(P), ++ which returns the position of the highest non-zero bit in ++ P. This approximation introduces an error as large as a ++ factor of 2, but the algorithm seems to handle it OK. ++ ++ Come to think of it a divide may not be a big deal on a ++ modern DSP, so its probably worth checking out the cycles ++ for a divide versus a top_bit() implementation. ++ */ ++ ++ P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates; ++ logP = top_bit(P) + ec->log2taps; ++ shift = 30 - 2 - logP; ++ ec->shift = shift; ++ ++ lms_adapt_bg(ec, clean_bg, shift); ++ } ++ ++ /* very simple DTD to make sure we dont try and adapt with strong ++ near end speech */ ++ ++ ec->adapt = 0; ++ if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx)) ++ ec->nonupdate_dwell = DTD_HANGOVER; ++ if (ec->nonupdate_dwell) ++ ec->nonupdate_dwell--; ++ ++ /* Transfer logic ------------------------------------------------------*/ ++ ++ /* These conditions are from the dual path paper [1], I messed with ++ them a bit to improve performance. */ ++ ++ if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) && ++ (ec->nonupdate_dwell == 0) && ++ (8*ec->Lclean_bg < 7*ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ && ++ (8*ec->Lclean_bg < ec->Ltx) /* (ec->Lclean_bg < 0.125*ec->Ltx) */ ) ++ { ++ if (ec->cond_met == 6) { ++ /* BG filter has had better results for 6 consecutive samples */ ++ ec->adapt = 1; ++ memcpy(ec->fir_taps16[0], ec->fir_taps16[1], ec->taps*sizeof(int16_t)); ++ } ++ else ++ ec->cond_met++; ++ } ++ else ++ ec->cond_met = 0; ++ ++ /* Non-Linear Processing ---------------------------------------------------*/ ++ ++ ec->clean_nlp = ec->clean; ++ if (ec->adaption_mode & ECHO_CAN_USE_NLP) ++ { ++ /* Non-linear processor - a fancy way to say "zap small signals, to avoid ++ residual echo due to (uLaw/ALaw) non-linearity in the channel.". */ ++ ++ if ((16*ec->Lclean < ec->Ltx)) ++ { ++ /* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB, ++ so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */ ++ if (ec->adaption_mode & ECHO_CAN_USE_CNG) ++ { ++ ec->cng_level = ec->Lbgn; ++ ++ /* Very elementary comfort noise generation. Just random ++ numbers rolled off very vaguely Hoth-like. DR: This ++ noise doesn't sound quite right to me - I suspect there ++ are some overlfow issues in the filtering as it's too ++ "crackly". TODO: debug this, maybe just play noise at ++ high level or look at spectrum. ++ */ ++ ++ ec->cng_rndnum = 1664525U*ec->cng_rndnum + 1013904223U; ++ ec->cng_filter = ((ec->cng_rndnum & 0xFFFF) - 32768 + 5*ec->cng_filter) >> 3; ++ ec->clean_nlp = (ec->cng_filter*ec->cng_level*8) >> 14; ++ ++ } ++ else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) ++ { ++ /* This sounds much better than CNG */ ++ if (ec->clean_nlp > ec->Lbgn) ++ ec->clean_nlp = ec->Lbgn; ++ if (ec->clean_nlp < -ec->Lbgn) ++ ec->clean_nlp = -ec->Lbgn; ++ } ++ else ++ { ++ /* just mute the residual, doesn't sound very good, used mainly ++ in G168 tests */ ++ ec->clean_nlp = 0; ++ } ++ } ++ else { ++ /* Background noise estimator. I tried a few algorithms ++ here without much luck. This very simple one seems to ++ work best, we just average the level using a slow (1 sec ++ time const) filter if the current level is less than a ++ (experimentally derived) constant. This means we dont ++ include high level signals like near end speech. When ++ combined with CNG or especially CLIP seems to work OK. ++ */ ++ if (ec->Lclean < 40) { ++ ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn; ++ ec->Lbgn = (ec->Lbgn_acc + (1<<11)) >> 12; ++ } ++ } ++ } ++ ++ /* Roll around the taps buffer */ ++ if (ec->curr_pos <= 0) ++ ec->curr_pos = ec->taps; ++ ec->curr_pos--; ++ ++ if (ec->adaption_mode & ECHO_CAN_DISABLE) ++ ec->clean_nlp = rx; ++ ++ /* Output scaled back up again to match input scaling */ ++ ++ return (int16_t) ec->clean_nlp << 1; ++} ++ ++/*- End of function --------------------------------------------------------*/ ++ ++/* This function is seperated from the echo canceller is it is usually called ++ as part of the tx process. See rx HP (DC blocking) filter above, it's ++ the same design. ++ ++ Some soft phones send speech signals with a lot of low frequency ++ energy, e.g. down to 20Hz. This can make the hybrid non-linear ++ which causes the echo canceller to fall over. This filter can help ++ by removing any low frequency before it gets to the tx port of the ++ hybrid. ++ ++ It can also help by removing and DC in the tx signal. DC is bad ++ for LMS algorithms. ++ ++ This is one of the classic DC removal filters, adjusted to provide sufficient ++ bass rolloff to meet the above requirement to protect hybrids from things that ++ upset them. The difference between successive samples produces a lousy HPF, and ++ then a suitably placed pole flattens things out. The final result is a nicely ++ rolled off bass end. The filtering is implemented with extended fractional ++ precision, which noise shapes things, giving very clean DC removal. ++*/ ++ ++int16_t echo_can_hpf_tx(echo_can_state_t *ec, int16_t tx) { ++ int tmp, tmp1; ++ ++ if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) { ++ tmp = tx << 15; ++#if 1 ++ /* Make sure the gain of the HPF is 1.0. The first can still saturate a little under ++ impulse conditions, and it might roll to 32768 and need clipping on sustained peak ++ level signals. However, the scale of such clipping is small, and the error due to ++ any saturation should not markedly affect the downstream processing. */ ++ tmp -= (tmp >> 4); ++#endif ++ ec->tx_1 += -(ec->tx_1>>DC_LOG2BETA) + tmp - ec->tx_2; ++ tmp1 = ec->tx_1 >> 15; ++ if (tmp1 > 32767) tmp1 = 32767; ++ if (tmp1 < -32767) tmp1 = -32767; ++ tx = tmp1; ++ ec->tx_2 = tmp; ++ } ++ ++ return tx; ++} +--- /dev/null ++++ b/drivers/staging/echo/echo.h +@@ -0,0 +1,220 @@ ++/* ++ * SpanDSP - a series of DSP components for telephony ++ * ++ * echo.c - A line echo canceller. This code is being developed ++ * against and partially complies with G168. ++ * ++ * Written by Steve Underwood <steveu@coppice.org> ++ * and David Rowe <david_at_rowetel_dot_com> ++ * ++ * Copyright (C) 2001 Steve Underwood and 2007 David Rowe ++ * ++ * All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2, as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ * $Id: echo.h,v 1.9 2006/10/24 13:45:28 steveu Exp $ ++ */ ++ ++#ifndef __ECHO_H ++#define __ECHO_H ++ ++/*! \page echo_can_page Line echo cancellation for voice ++ ++\section echo_can_page_sec_1 What does it do? ++This module aims to provide G.168-2002 compliant echo cancellation, to remove ++electrical echoes (e.g. from 2-4 wire hybrids) from voice calls. ++ ++\section echo_can_page_sec_2 How does it work? ++The heart of the echo cancellor is FIR filter. This is adapted to match the ++echo impulse response of the telephone line. It must be long enough to ++adequately cover the duration of that impulse response. The signal transmitted ++to the telephone line is passed through the FIR filter. Once the FIR is ++properly adapted, the resulting output is an estimate of the echo signal ++received from the line. This is subtracted from the received signal. The result ++is an estimate of the signal which originated at the far end of the line, free ++from echos of our own transmitted signal. ++ ++The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and ++was introduced in 1960. It is the commonest form of filter adaption used in ++things like modem line equalisers and line echo cancellers. There it works very ++well. However, it only works well for signals of constant amplitude. It works ++very poorly for things like speech echo cancellation, where the signal level ++varies widely. This is quite easy to fix. If the signal level is normalised - ++similar to applying AGC - LMS can work as well for a signal of varying ++amplitude as it does for a modem signal. This normalised least mean squares ++(NLMS) algorithm is the commonest one used for speech echo cancellation. Many ++other algorithms exist - e.g. RLS (essentially the same as Kalman filtering), ++FAP, etc. Some perform significantly better than NLMS. However, factors such ++as computational complexity and patents favour the use of NLMS. ++ ++A simple refinement to NLMS can improve its performance with speech. NLMS tends ++to adapt best to the strongest parts of a signal. If the signal is white noise, ++the NLMS algorithm works very well. However, speech has more low frequency than ++high frequency content. Pre-whitening (i.e. filtering the signal to flatten its ++spectrum) the echo signal improves the adapt rate for speech, and ensures the ++final residual signal is not heavily biased towards high frequencies. A very ++low complexity filter is adequate for this, so pre-whitening adds little to the ++compute requirements of the echo canceller. ++ ++An FIR filter adapted using pre-whitened NLMS performs well, provided certain ++conditions are met: ++ ++ - The transmitted signal has poor self-correlation. ++ - There is no signal being generated within the environment being ++ cancelled. ++ ++The difficulty is that neither of these can be guaranteed. ++ ++If the adaption is performed while transmitting noise (or something fairly ++noise like, such as voice) the adaption works very well. If the adaption is ++performed while transmitting something highly correlative (typically narrow ++band energy such as signalling tones or DTMF), the adaption can go seriously ++wrong. The reason is there is only one solution for the adaption on a near ++random signal - the impulse response of the line. For a repetitive signal, ++there are any number of solutions which converge the adaption, and nothing ++guides the adaption to choose the generalised one. Allowing an untrained ++canceller to converge on this kind of narrowband energy probably a good thing, ++since at least it cancels the tones. Allowing a well converged canceller to ++continue converging on such energy is just a way to ruin its generalised ++adaption. A narrowband detector is needed, so adapation can be suspended at ++appropriate times. ++ ++The adaption process is based on trying to eliminate the received signal. When ++there is any signal from within the environment being cancelled it may upset ++the adaption process. Similarly, if the signal we are transmitting is small, ++noise may dominate and disturb the adaption process. If we can ensure that the ++adaption is only performed when we are transmitting a significant signal level, ++and the environment is not, things will be OK. Clearly, it is easy to tell when ++we are sending a significant signal. Telling, if the environment is generating ++a significant signal, and doing it with sufficient speed that the adaption will ++not have diverged too much more we stop it, is a little harder. ++ ++The key problem in detecting when the environment is sourcing significant ++energy is that we must do this very quickly. Given a reasonably long sample of ++the received signal, there are a number of strategies which may be used to ++assess whether that signal contains a strong far end component. However, by the ++time that assessment is complete the far end signal will have already caused ++major mis-convergence in the adaption process. An assessment algorithm is ++needed which produces a fairly accurate result from a very short burst of far ++end energy. ++ ++\section echo_can_page_sec_3 How do I use it? ++The echo cancellor processes both the transmit and receive streams sample by ++sample. The processing function is not declared inline. Unfortunately, ++cancellation requires many operations per sample, so the call overhead is only ++a minor burden. ++*/ ++ ++#include "fir.h" ++ ++/* Mask bits for the adaption mode */ ++#define ECHO_CAN_USE_ADAPTION 0x01 ++#define ECHO_CAN_USE_NLP 0x02 ++#define ECHO_CAN_USE_CNG 0x04 ++#define ECHO_CAN_USE_CLIP 0x08 ++#define ECHO_CAN_USE_TX_HPF 0x10 ++#define ECHO_CAN_USE_RX_HPF 0x20 ++#define ECHO_CAN_DISABLE 0x40 ++ ++/*! ++ G.168 echo canceller descriptor. This defines the working state for a line ++ echo canceller. ++*/ ++typedef struct ++{ ++ int16_t tx,rx; ++ int16_t clean; ++ int16_t clean_nlp; ++ ++ int nonupdate_dwell; ++ int curr_pos; ++ int taps; ++ int log2taps; ++ int adaption_mode; ++ ++ int cond_met; ++ int32_t Pstates; ++ int16_t adapt; ++ int32_t factor; ++ int16_t shift; ++ ++ /* Average levels and averaging filter states */ ++ int Ltxacc, Lrxacc, Lcleanacc, Lclean_bgacc; ++ int Ltx, Lrx; ++ int Lclean; ++ int Lclean_bg; ++ int Lbgn, Lbgn_acc, Lbgn_upper, Lbgn_upper_acc; ++ ++ /* foreground and background filter states */ ++ fir16_state_t fir_state; ++ fir16_state_t fir_state_bg; ++ int16_t *fir_taps16[2]; ++ ++ /* DC blocking filter states */ ++ int tx_1, tx_2, rx_1, rx_2; ++ ++ /* optional High Pass Filter states */ ++ int32_t xvtx[5], yvtx[5]; ++ int32_t xvrx[5], yvrx[5]; ++ ++ /* Parameters for the optional Hoth noise generator */ ++ int cng_level; ++ int cng_rndnum; ++ int cng_filter; ++ ++ /* snapshot sample of coeffs used for development */ ++ int16_t *snapshot; ++} echo_can_state_t; ++ ++/*! Create a voice echo canceller context. ++ \param len The length of the canceller, in samples. ++ \return The new canceller context, or NULL if the canceller could not be created. ++*/ ++echo_can_state_t *echo_can_create(int len, int adaption_mode); ++ ++/*! Free a voice echo canceller context. ++ \param ec The echo canceller context. ++*/ ++void echo_can_free(echo_can_state_t *ec); ++ ++/*! Flush (reinitialise) a voice echo canceller context. ++ \param ec The echo canceller context. ++*/ ++void echo_can_flush(echo_can_state_t *ec); ++ ++/*! Set the adaption mode of a voice echo canceller context. ++ \param ec The echo canceller context. ++ \param adapt The mode. ++*/ ++void echo_can_adaption_mode(echo_can_state_t *ec, int adaption_mode); ++ ++void echo_can_snapshot(echo_can_state_t *ec); ++ ++/*! Process a sample through a voice echo canceller. ++ \param ec The echo canceller context. ++ \param tx The transmitted audio sample. ++ \param rx The received audio sample. ++ \return The clean (echo cancelled) received sample. ++*/ ++int16_t echo_can_update(echo_can_state_t *ec, int16_t tx, int16_t rx); ++ ++/*! Process to high pass filter the tx signal. ++ \param ec The echo canceller context. ++ \param tx The transmitted auio sample. ++ \return The HP filtered transmit sample, send this to your D/A. ++*/ ++int16_t echo_can_hpf_tx(echo_can_state_t *ec, int16_t tx); ++ ++#endif /* __ECHO_H */ +--- /dev/null ++++ b/drivers/staging/echo/fir.h +@@ -0,0 +1,369 @@ ++/* ++ * SpanDSP - a series of DSP components for telephony ++ * ++ * fir.h - General telephony FIR routines ++ * ++ * Written by Steve Underwood <steveu@coppice.org> ++ * ++ * Copyright (C) 2002 Steve Underwood ++ * ++ * All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2, as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ * $Id: fir.h,v 1.8 2006/10/24 13:45:28 steveu Exp $ ++ */ ++ ++/*! \page fir_page FIR filtering ++\section fir_page_sec_1 What does it do? ++???. ++ ++\section fir_page_sec_2 How does it work? ++???. ++*/ ++ ++#if !defined(_FIR_H_) ++#define _FIR_H_ ++ ++/* ++ Blackfin NOTES & IDEAS: ++ ++ A simple dot product function is used to implement the filter. This performs ++ just one MAC/cycle which is inefficient but was easy to implement as a first ++ pass. The current Blackfin code also uses an unrolled form of the filter ++ history to avoid 0 length hardware loop issues. This is wasteful of ++ memory. ++ ++ Ideas for improvement: ++ ++ 1/ Rewrite filter for dual MAC inner loop. The issue here is handling ++ history sample offsets that are 16 bit aligned - the dual MAC needs ++ 32 bit aligmnent. There are some good examples in libbfdsp. ++ ++ 2/ Use the hardware circular buffer facility tohalve memory usage. ++ ++ 3/ Consider using internal memory. ++ ++ Using less memory might also improve speed as cache misses will be ++ reduced. A drop in MIPs and memory approaching 50% should be ++ possible. ++ ++ The foreground and background filters currenlty use a total of ++ about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo ++ can. ++*/ ++ ++#if defined(USE_MMX) || defined(USE_SSE2) ++#include "mmx.h" ++#endif ++ ++/*! ++ 16 bit integer FIR descriptor. This defines the working state for a single ++ instance of an FIR filter using 16 bit integer coefficients. ++*/ ++typedef struct ++{ ++ int taps; ++ int curr_pos; ++ const int16_t *coeffs; ++ int16_t *history; ++} fir16_state_t; ++ ++/*! ++ 32 bit integer FIR descriptor. This defines the working state for a single ++ instance of an FIR filter using 32 bit integer coefficients, and filtering ++ 16 bit integer data. ++*/ ++typedef struct ++{ ++ int taps; ++ int curr_pos; ++ const int32_t *coeffs; ++ int16_t *history; ++} fir32_state_t; ++ ++/*! ++ Floating point FIR descriptor. This defines the working state for a single ++ instance of an FIR filter using floating point coefficients and data. ++*/ ++typedef struct ++{ ++ int taps; ++ int curr_pos; ++ const float *coeffs; ++ float *history; ++} fir_float_state_t; ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++static __inline__ const int16_t *fir16_create(fir16_state_t *fir, ++ const int16_t *coeffs, ++ int taps) ++{ ++ fir->taps = taps; ++ fir->curr_pos = taps - 1; ++ fir->coeffs = coeffs; ++#if defined(USE_MMX) || defined(USE_SSE2) || defined(__BLACKFIN_ASM__) ++ if ((fir->history = malloc(2*taps*sizeof(int16_t)))) ++ memset(fir->history, 0, 2*taps*sizeof(int16_t)); ++#else ++ if ((fir->history = (int16_t *) malloc(taps*sizeof(int16_t)))) ++ memset(fir->history, 0, taps*sizeof(int16_t)); ++#endif ++ return fir->history; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++static __inline__ void fir16_flush(fir16_state_t *fir) ++{ ++#if defined(USE_MMX) || defined(USE_SSE2) || defined(__BLACKFIN_ASM__) ++ memset(fir->history, 0, 2*fir->taps*sizeof(int16_t)); ++#else ++ memset(fir->history, 0, fir->taps*sizeof(int16_t)); ++#endif ++} ++/*- End of function --------------------------------------------------------*/ ++ ++static __inline__ void fir16_free(fir16_state_t *fir) ++{ ++ free(fir->history); ++} ++/*- End of function --------------------------------------------------------*/ ++ ++#ifdef __BLACKFIN_ASM__ ++static inline int32_t dot_asm(short *x, short *y, int len) ++{ ++ int dot; ++ ++ len--; ++ ++ __asm__ ++ ( ++ "I0 = %1;\n\t" ++ "I1 = %2;\n\t" ++ "A0 = 0;\n\t" ++ "R0.L = W[I0++] || R1.L = W[I1++];\n\t" ++ "LOOP dot%= LC0 = %3;\n\t" ++ "LOOP_BEGIN dot%=;\n\t" ++ "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t" ++ "LOOP_END dot%=;\n\t" ++ "A0 += R0.L*R1.L (IS);\n\t" ++ "R0 = A0;\n\t" ++ "%0 = R0;\n\t" ++ : "=&d" (dot) ++ : "a" (x), "a" (y), "a" (len) ++ : "I0", "I1", "A1", "A0", "R0", "R1" ++ ); ++ ++ return dot; ++} ++#endif ++/*- End of function --------------------------------------------------------*/ ++ ++static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample) ++{ ++ int32_t y; ++#if defined(USE_MMX) ++ int i; ++ mmx_t *mmx_coeffs; ++ mmx_t *mmx_hist; ++ ++ fir->history[fir->curr_pos] = sample; ++ fir->history[fir->curr_pos + fir->taps] = sample; ++ ++ mmx_coeffs = (mmx_t *) fir->coeffs; ++ mmx_hist = (mmx_t *) &fir->history[fir->curr_pos]; ++ i = fir->taps; ++ pxor_r2r(mm4, mm4); ++ /* 8 samples per iteration, so the filter must be a multiple of 8 long. */ ++ while (i > 0) ++ { ++ movq_m2r(mmx_coeffs[0], mm0); ++ movq_m2r(mmx_coeffs[1], mm2); ++ movq_m2r(mmx_hist[0], mm1); ++ movq_m2r(mmx_hist[1], mm3); ++ mmx_coeffs += 2; ++ mmx_hist += 2; ++ pmaddwd_r2r(mm1, mm0); ++ pmaddwd_r2r(mm3, mm2); ++ paddd_r2r(mm0, mm4); ++ paddd_r2r(mm2, mm4); ++ i -= 8; ++ } ++ movq_r2r(mm4, mm0); ++ psrlq_i2r(32, mm0); ++ paddd_r2r(mm0, mm4); ++ movd_r2m(mm4, y); ++ emms(); ++#elif defined(USE_SSE2) ++ int i; ++ xmm_t *xmm_coeffs; ++ xmm_t *xmm_hist; ++ ++ fir->history[fir->curr_pos] = sample; ++ fir->history[fir->curr_pos + fir->taps] = sample; ++ ++ xmm_coeffs = (xmm_t *) fir->coeffs; ++ xmm_hist = (xmm_t *) &fir->history[fir->curr_pos]; ++ i = fir->taps; ++ pxor_r2r(xmm4, xmm4); ++ /* 16 samples per iteration, so the filter must be a multiple of 16 long. */ ++ while (i > 0) ++ { ++ movdqu_m2r(xmm_coeffs[0], xmm0); ++ movdqu_m2r(xmm_coeffs[1], xmm2); ++ movdqu_m2r(xmm_hist[0], xmm1); ++ movdqu_m2r(xmm_hist[1], xmm3); ++ xmm_coeffs += 2; ++ xmm_hist += 2; ++ pmaddwd_r2r(xmm1, xmm0); ++ pmaddwd_r2r(xmm3, xmm2); ++ paddd_r2r(xmm0, xmm4); ++ paddd_r2r(xmm2, xmm4); ++ i -= 16; ++ } ++ movdqa_r2r(xmm4, xmm0); ++ psrldq_i2r(8, xmm0); ++ paddd_r2r(xmm0, xmm4); ++ movdqa_r2r(xmm4, xmm0); ++ psrldq_i2r(4, xmm0); ++ paddd_r2r(xmm0, xmm4); ++ movd_r2m(xmm4, y); ++#elif defined(__BLACKFIN_ASM__) ++ fir->history[fir->curr_pos] = sample; ++ fir->history[fir->curr_pos + fir->taps] = sample; ++ y = dot_asm((int16_t*)fir->coeffs, &fir->history[fir->curr_pos], fir->taps); ++#else ++ int i; ++ int offset1; ++ int offset2; ++ ++ fir->history[fir->curr_pos] = sample; ++ ++ offset2 = fir->curr_pos; ++ offset1 = fir->taps - offset2; ++ y = 0; ++ for (i = fir->taps - 1; i >= offset1; i--) ++ y += fir->coeffs[i]*fir->history[i - offset1]; ++ for ( ; i >= 0; i--) ++ y += fir->coeffs[i]*fir->history[i + offset2]; ++#endif ++ if (fir->curr_pos <= 0) ++ fir->curr_pos = fir->taps; ++ fir->curr_pos--; ++ return (int16_t) (y >> 15); ++} ++/*- End of function --------------------------------------------------------*/ ++ ++static __inline__ const int16_t *fir32_create(fir32_state_t *fir, ++ const int32_t *coeffs, ++ int taps) ++{ ++ fir->taps = taps; ++ fir->curr_pos = taps - 1; ++ fir->coeffs = coeffs; ++ fir->history = (int16_t *) malloc(taps*sizeof(int16_t)); ++ if (fir->history) ++ memset(fir->history, '\0', taps*sizeof(int16_t)); ++ return fir->history; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++static __inline__ void fir32_flush(fir32_state_t *fir) ++{ ++ memset(fir->history, 0, fir->taps*sizeof(int16_t)); ++} ++/*- End of function --------------------------------------------------------*/ ++ ++static __inline__ void fir32_free(fir32_state_t *fir) ++{ ++ free(fir->history); ++} ++/*- End of function --------------------------------------------------------*/ ++ ++static __inline__ int16_t fir32(fir32_state_t *fir, int16_t sample) ++{ ++ int i; ++ int32_t y; ++ int offset1; ++ int offset2; ++ ++ fir->history[fir->curr_pos] = sample; ++ offset2 = fir->curr_pos; ++ offset1 = fir->taps - offset2; ++ y = 0; ++ for (i = fir->taps - 1; i >= offset1; i--) ++ y += fir->coeffs[i]*fir->history[i - offset1]; ++ for ( ; i >= 0; i--) ++ y += fir->coeffs[i]*fir->history[i + offset2]; ++ if (fir->curr_pos <= 0) ++ fir->curr_pos = fir->taps; ++ fir->curr_pos--; ++ return (int16_t) (y >> 15); ++} ++/*- End of function --------------------------------------------------------*/ ++ ++#ifndef __KERNEL__ ++static __inline__ const float *fir_float_create(fir_float_state_t *fir, ++ const float *coeffs, ++ int taps) ++{ ++ fir->taps = taps; ++ fir->curr_pos = taps - 1; ++ fir->coeffs = coeffs; ++ fir->history = (float *) malloc(taps*sizeof(float)); ++ if (fir->history) ++ memset(fir->history, '\0', taps*sizeof(float)); ++ return fir->history; ++} ++/*- End of function --------------------------------------------------------*/ ++ ++static __inline__ void fir_float_free(fir_float_state_t *fir) ++{ ++ free(fir->history); ++} ++/*- End of function --------------------------------------------------------*/ ++ ++static __inline__ int16_t fir_float(fir_float_state_t *fir, int16_t sample) ++{ ++ int i; ++ float y; ++ int offset1; ++ int offset2; ++ ++ fir->history[fir->curr_pos] = sample; ++ ++ offset2 = fir->curr_pos; ++ offset1 = fir->taps - offset2; ++ y = 0; ++ for (i = fir->taps - 1; i >= offset1; i--) ++ y += fir->coeffs[i]*fir->history[i - offset1]; ++ for ( ; i >= 0; i--) ++ y += fir->coeffs[i]*fir->history[i + offset2]; ++ if (fir->curr_pos <= 0) ++ fir->curr_pos = fir->taps; ++ fir->curr_pos--; ++ return (int16_t) y; ++} ++/*- End of function --------------------------------------------------------*/ ++#endif ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif ++/*- End of file ------------------------------------------------------------*/ +--- /dev/null ++++ b/drivers/staging/echo/Kconfig +@@ -0,0 +1,9 @@ ++config ECHO ++ tristate "Line Echo Canceller support" ++ default n ++ ---help--- ++ This driver provides line echo cancelling support for mISDN and ++ Zaptel drivers. ++ ++ To compile this driver as a module, choose M here. The module ++ will be called echo. +--- /dev/null ++++ b/drivers/staging/echo/Makefile +@@ -0,0 +1 @@ ++obj-$(CONFIG_ECHO) += echo.o +--- /dev/null ++++ b/drivers/staging/echo/mmx.h +@@ -0,0 +1,288 @@ ++/* ++ * mmx.h ++ * Copyright (C) 1997-2001 H. Dietz and R. Fisher ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++#ifndef AVCODEC_I386MMX_H ++#define AVCODEC_I386MMX_H ++ ++/* ++ * The type of an value that fits in an MMX register (note that long ++ * long constant values MUST be suffixed by LL and unsigned long long ++ * values by ULL, lest they be truncated by the compiler) ++ */ ++ ++typedef union { ++ long long q; /* Quadword (64-bit) value */ ++ unsigned long long uq; /* Unsigned Quadword */ ++ int d[2]; /* 2 Doubleword (32-bit) values */ ++ unsigned int ud[2]; /* 2 Unsigned Doubleword */ ++ short w[4]; /* 4 Word (16-bit) values */ ++ unsigned short uw[4]; /* 4 Unsigned Word */ ++ char b[8]; /* 8 Byte (8-bit) values */ ++ unsigned char ub[8]; /* 8 Unsigned Byte */ ++ float s[2]; /* Single-precision (32-bit) value */ ++} mmx_t; /* On an 8-byte (64-bit) boundary */ ++ ++/* SSE registers */ ++typedef union { ++ char b[16]; ++} xmm_t; ++ ++ ++#define mmx_i2r(op,imm,reg) \ ++ __asm__ __volatile__ (#op " %0, %%" #reg \ ++ : /* nothing */ \ ++ : "i" (imm) ) ++ ++#define mmx_m2r(op,mem,reg) \ ++ __asm__ __volatile__ (#op " %0, %%" #reg \ ++ : /* nothing */ \ ++ : "m" (mem)) ++ ++#define mmx_r2m(op,reg,mem) \ ++ __asm__ __volatile__ (#op " %%" #reg ", %0" \ ++ : "=m" (mem) \ ++ : /* nothing */ ) ++ ++#define mmx_r2r(op,regs,regd) \ ++ __asm__ __volatile__ (#op " %" #regs ", %" #regd) ++ ++ ++#define emms() __asm__ __volatile__ ("emms") ++ ++#define movd_m2r(var,reg) mmx_m2r (movd, var, reg) ++#define movd_r2m(reg,var) mmx_r2m (movd, reg, var) ++#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd) ++ ++#define movq_m2r(var,reg) mmx_m2r (movq, var, reg) ++#define movq_r2m(reg,var) mmx_r2m (movq, reg, var) ++#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) ++ ++#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) ++#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) ++#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) ++#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) ++ ++#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) ++#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) ++ ++#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) ++#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) ++#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) ++#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) ++#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) ++#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) ++ ++#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) ++#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) ++#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) ++#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) ++ ++#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) ++#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) ++#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) ++#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) ++ ++#define pand_m2r(var,reg) mmx_m2r (pand, var, reg) ++#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) ++ ++#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) ++#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) ++ ++#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) ++#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) ++#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) ++#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) ++#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) ++#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) ++ ++#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) ++#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) ++#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) ++#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) ++#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) ++#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) ++ ++#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) ++#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) ++ ++#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) ++#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) ++ ++#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) ++#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) ++ ++#define por_m2r(var,reg) mmx_m2r (por, var, reg) ++#define por_r2r(regs,regd) mmx_r2r (por, regs, regd) ++ ++#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) ++#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) ++#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) ++#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) ++#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) ++#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) ++#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) ++#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) ++#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) ++ ++#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) ++#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) ++#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) ++#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) ++#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) ++#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) ++ ++#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) ++#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) ++#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) ++#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) ++#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) ++#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) ++#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) ++#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) ++#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) ++ ++#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) ++#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) ++#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) ++#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) ++#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) ++#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) ++ ++#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) ++#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) ++#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) ++#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) ++ ++#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) ++#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) ++#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) ++#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) ++ ++#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) ++#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) ++#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) ++#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) ++#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) ++#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) ++ ++#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) ++#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) ++#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) ++#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) ++#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) ++#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) ++ ++#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) ++#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) ++ ++ ++/* 3DNOW extensions */ ++ ++#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) ++#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) ++ ++ ++/* AMD MMX extensions - also available in intel SSE */ ++ ++ ++#define mmx_m2ri(op,mem,reg,imm) \ ++ __asm__ __volatile__ (#op " %1, %0, %%" #reg \ ++ : /* nothing */ \ ++ : "m" (mem), "i" (imm)) ++#define mmx_r2ri(op,regs,regd,imm) \ ++ __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ ++ : /* nothing */ \ ++ : "i" (imm) ) ++ ++#define mmx_fetch(mem,hint) \ ++ __asm__ __volatile__ ("prefetch" #hint " %0" \ ++ : /* nothing */ \ ++ : "m" (mem)) ++ ++ ++#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) ++ ++#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) ++ ++#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) ++#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) ++#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) ++#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) ++ ++#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) ++ ++#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) ++ ++#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) ++#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) ++ ++#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) ++#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) ++ ++#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) ++#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) ++ ++#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) ++#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) ++ ++#define pmovmskb(mmreg,reg) \ ++ __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) ++ ++#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) ++#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) ++ ++#define prefetcht0(mem) mmx_fetch (mem, t0) ++#define prefetcht1(mem) mmx_fetch (mem, t1) ++#define prefetcht2(mem) mmx_fetch (mem, t2) ++#define prefetchnta(mem) mmx_fetch (mem, nta) ++ ++#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) ++#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) ++ ++#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) ++#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) ++ ++#define sfence() __asm__ __volatile__ ("sfence\n\t") ++ ++/* SSE2 */ ++#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm) ++#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm) ++#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm) ++#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm) ++ ++#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm) ++ ++#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg) ++#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var) ++#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd) ++#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg) ++#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var) ++#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd) ++ ++#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var) ++ ++#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg) ++#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg) ++ ++#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd) ++#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd) ++ ++ ++#endif /* AVCODEC_I386MMX_H */ +--- /dev/null ++++ b/drivers/staging/echo/TODO +@@ -0,0 +1,10 @@ ++TODO: ++ - checkpatch.pl cleanups ++ - Lindent ++ - typedef removals ++ - handle bit_operations.h (merge in or make part of common code?) ++ - remove proc interface, only use echo.h interface (proc interface is ++ racy and not correct.) ++ ++Please send patches to Greg Kroah-Hartman <greg@kroah.com> and Cc: Steve ++Underwood <steveu@coppice.org> and David Rowe <david@rowetel.com> +--- a/drivers/staging/Kconfig ++++ b/drivers/staging/Kconfig +@@ -39,4 +39,6 @@ source "drivers/staging/winbond/Kconfig" + + source "drivers/staging/wlan-ng/Kconfig" + ++source "drivers/staging/echo/Kconfig" ++ + endif # STAGING +--- a/drivers/staging/Makefile ++++ b/drivers/staging/Makefile +@@ -8,3 +8,4 @@ obj-$(CONFIG_VIDEO_GO7007) += go7007/ + obj-$(CONFIG_USB_IP_COMMON) += usbip/ + obj-$(CONFIG_W35UND) += winbond/ + obj-$(CONFIG_PRISM2_USB) += wlan-ng/ ++obj-$(CONFIG_ECHO) += echo/ diff --git a/staging/staging-slicoss-fix-warnings-due-to-static-usage.patch b/staging/staging-slicoss-fix-warnings-due-to-static-usage.patch new file mode 100644 index 00000000000000..69a41419c801bf --- /dev/null +++ b/staging/staging-slicoss-fix-warnings-due-to-static-usage.patch @@ -0,0 +1,83 @@ +From liodot@gmail.com Mon Oct 6 21:20:34 2008 +From: "Lior Dotan" <liodot@gmail.com> +Date: Sun, 5 Oct 2008 15:35:46 +0300 +Subject: Staging: SLICOSS: Fix warnings due to static usage +To: "Greg KH" <greg@kroah.com> +Message-ID: <a5bf86a80810050535i28686766reafe9723a840a76a@mail.gmail.com> +Content-Disposition: inline + + +Fix a few warning messages that crept in due to conversion of all the +functions to static + +Signed-off-by: Lior Dotan <liodot@gmail.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + + +--- + drivers/staging/slicoss/slicinc.h | 6 ------ + drivers/staging/slicoss/slicoss.c | 3 ++- + 2 files changed, 2 insertions(+), 7 deletions(-) + +--- a/drivers/staging/slicoss/slicinc.h ++++ b/drivers/staging/slicoss/slicinc.h +@@ -61,7 +61,6 @@ static void slic_xmit_fail(struct adapte + void *cmd, + u32 skbtype, + u32 status); +-static void slic_xmit_timeout(struct net_device *dev); + static void slic_config_pci(struct pci_dev *pcidev); + static struct sk_buff *slic_rcvqueue_getnext(struct adapter *adapter); + +@@ -90,8 +89,6 @@ static void slic_cmdq_free(struct adapt + static void slic_cmdq_reset(struct adapter *adapter); + static void slic_cmdq_addcmdpage(struct adapter *adapter, u32 *page); + static void slic_cmdq_getdone(struct adapter *adapter); +-static void slic_cmdq_putdone(struct adapter *adapter, +- struct slic_hostcmd *cmd); + static void slic_cmdq_putdone_irq(struct adapter *adapter, + struct slic_hostcmd *cmd); + static struct slic_hostcmd *slic_cmdq_getfree(struct adapter *adapter); +@@ -103,7 +100,6 @@ static void slic_rcvqueue_free(struct a + static void slic_rcv_handle_error(struct adapter *adapter, + struct slic_rcvbuf *rcvbuf); + static void slic_adapter_set_hwaddr(struct adapter *adapter); +-static void slic_card_halt(struct sliccard *card, struct adapter *adapter); + static int slic_card_init(struct sliccard *card, struct adapter *adapter); + static void slic_intagg_set(struct adapter *adapter, u32 value); + static int slic_card_download(struct adapter *adapter); +@@ -120,7 +116,6 @@ static void slic_unmap_mmio_space(struct + static void slic_card_cleanup(struct sliccard *card); + static void slic_init_cleanup(struct adapter *adapter); + static void slic_soft_reset(struct adapter *adapter); +-static void slic_card_reset(struct adapter *adapter); + static bool slic_mac_filter(struct adapter *adapter, + struct ether_header *ether_frame); + static void slic_mac_address_config(struct adapter *adapter); +@@ -133,7 +128,6 @@ static void slic_config_set(struct adapt + static void slic_config_clear(struct adapter *adapter); + static void slic_config_get(struct adapter *adapter, u32 config, + u32 configh); +-static void slic_timer_get_stats(ulong device); + static void slic_timer_load_check(ulong context); + static void slic_timer_ping(ulong dev); + static void slic_assert_fail(void); +--- a/drivers/staging/slicoss/slicoss.c ++++ b/drivers/staging/slicoss/slicoss.c +@@ -3128,6 +3128,7 @@ static int slic_mac_set_address(struct n + * 50 seconds or whatever STATS_TIMER_INTERVAL is set to. + * + */ ++#if SLIC_GET_STATS_TIMER_ENABLED + static void slic_timer_get_stats(ulong dev) + { + struct adapter *adapter; +@@ -3163,7 +3164,7 @@ static void slic_timer_get_stats(ulong d + SLIC_SECS_TO_JIFFS(STATS_TIMER_INTERVAL); + add_timer(&adapter->statstimer); + } +- ++#endif + static void slic_timer_load_check(ulong cardaddr) + { + struct sliccard *card = (struct sliccard *)cardaddr; diff --git a/usb/usb-ehci-fix-remote-wakeup-support-for-arc-tdi-core.patch b/usb/usb-ehci-fix-remote-wakeup-support-for-arc-tdi-core.patch new file mode 100644 index 00000000000000..37ece8adac3b5d --- /dev/null +++ b/usb/usb-ehci-fix-remote-wakeup-support-for-arc-tdi-core.patch @@ -0,0 +1,151 @@ +From stern+48fbdcb8@rowland.harvard.edu Mon Oct 6 21:11:37 2008 +From: Alan Stern <stern@rowland.harvard.edu> +Date: Mon, 6 Oct 2008 11:25:53 -0400 (EDT) +Subject: USB: EHCI: fix remote-wakeup support for ARC/TDI core +To: Greg KH <greg@kroah.com> +Cc: David Brownell <david-b@pacbell.net> +Message-ID: <Pine.LNX.4.44L0.0810061124270.2180-100000@iolanthe.rowland.org> + + +This patch (as1147) fixes the remote-wakeup support for EHCI +controllers using the ARC/TDI "embedded-TT" core. These controllers +turn off the RESUME bit by themselves when a port resume is complete; +hence we need to keep separate track of which ports are suspended or +in the process of resuming. + +The patch also makes a couple of small improvements in ehci_irq(), +replacing reads of the command register with the value already stored +in a local variable. + +Signed-off-by: Alan Stern <stern@rowland.harvard.edu> +Tested-by: Thomas Reitmayr <treitmayr@devbase.at> +CC: David Brownell <david-b@pacbell.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/usb/host/ehci-hcd.c | 14 ++++++++------ + drivers/usb/host/ehci-hub.c | 27 +++++++++++++++++++-------- + drivers/usb/host/ehci.h | 2 ++ + 3 files changed, 29 insertions(+), 14 deletions(-) + +--- a/drivers/usb/host/ehci.h ++++ b/drivers/usb/host/ehci.h +@@ -99,6 +99,8 @@ struct ehci_hcd { /* one per controlle + owned by the companion during a bus suspend */ + unsigned long port_c_suspend; /* which ports have + the change-suspend feature turned on */ ++ unsigned long suspended_ports; /* which ports are ++ suspended */ + + /* per-HC memory pools (could be per-bus, but ...) */ + struct dma_pool *qh_pool; /* qh per active urb */ +--- a/drivers/usb/host/ehci-hcd.c ++++ b/drivers/usb/host/ehci-hcd.c +@@ -705,7 +705,7 @@ static irqreturn_t ehci_irq (struct usb_ + pcd_status = status; + + /* resume root hub? */ +- if (!(ehci_readl(ehci, &ehci->regs->command) & CMD_RUN)) ++ if (!(cmd & CMD_RUN)) + usb_hcd_resume_root_hub(hcd); + + while (i--) { +@@ -714,8 +714,11 @@ static irqreturn_t ehci_irq (struct usb_ + + if (pstatus & PORT_OWNER) + continue; +- if (!(pstatus & PORT_RESUME) +- || ehci->reset_done [i] != 0) ++ if (!(test_bit(i, &ehci->suspended_ports) && ++ ((pstatus & PORT_RESUME) || ++ !(pstatus & PORT_SUSPEND)) && ++ (pstatus & PORT_PE) && ++ ehci->reset_done[i] == 0)) + continue; + + /* start 20 msec resume signaling from this port, +@@ -730,9 +733,8 @@ static irqreturn_t ehci_irq (struct usb_ + + /* PCI errors [4.15.2.4] */ + if (unlikely ((status & STS_FATAL) != 0)) { +- dbg_cmd (ehci, "fatal", ehci_readl(ehci, +- &ehci->regs->command)); +- dbg_status (ehci, "fatal", status); ++ dbg_cmd(ehci, "fatal", cmd); ++ dbg_status(ehci, "fatal", status); + if (status & STS_HALT) { + ehci_err (ehci, "fatal error\n"); + dead: +--- a/drivers/usb/host/ehci-hub.c ++++ b/drivers/usb/host/ehci-hub.c +@@ -236,10 +236,8 @@ static int ehci_bus_resume (struct usb_h + temp = ehci_readl(ehci, &ehci->regs->port_status [i]); + temp &= ~(PORT_RWC_BITS | PORT_WAKE_BITS); + if (test_bit(i, &ehci->bus_suspended) && +- (temp & PORT_SUSPEND)) { +- ehci->reset_done [i] = jiffies + msecs_to_jiffies (20); ++ (temp & PORT_SUSPEND)) + temp |= PORT_RESUME; +- } + ehci_writel(ehci, temp, &ehci->regs->port_status [i]); + } + i = HCS_N_PORTS (ehci->hcs_params); +@@ -482,10 +480,9 @@ ehci_hub_status_data (struct usb_hcd *hc + * controller by the user. + */ + +- if ((temp & mask) != 0 +- || ((temp & PORT_RESUME) != 0 +- && time_after_eq(jiffies, +- ehci->reset_done[i]))) { ++ if ((temp & mask) != 0 || test_bit(i, &ehci->port_c_suspend) ++ || (ehci->reset_done[i] && time_after_eq( ++ jiffies, ehci->reset_done[i]))) { + if (i < 7) + buf [0] |= 1 << (i + 1); + else +@@ -688,6 +685,7 @@ static int ehci_hub_control ( + /* resume completed? */ + else if (time_after_eq(jiffies, + ehci->reset_done[wIndex])) { ++ clear_bit(wIndex, &ehci->suspended_ports); + set_bit(wIndex, &ehci->port_c_suspend); + ehci->reset_done[wIndex] = 0; + +@@ -734,6 +732,9 @@ static int ehci_hub_control ( + ehci_readl(ehci, status_reg)); + } + ++ if (!(temp & (PORT_RESUME|PORT_RESET))) ++ ehci->reset_done[wIndex] = 0; ++ + /* transfer dedicated ports to the companion hc */ + if ((temp & PORT_CONNECT) && + test_bit(wIndex, &ehci->companion_ports)) { +@@ -757,8 +758,17 @@ static int ehci_hub_control ( + } + if (temp & PORT_PE) + status |= 1 << USB_PORT_FEAT_ENABLE; +- if (temp & (PORT_SUSPEND|PORT_RESUME)) ++ ++ /* maybe the port was unsuspended without our knowledge */ ++ if (temp & (PORT_SUSPEND|PORT_RESUME)) { + status |= 1 << USB_PORT_FEAT_SUSPEND; ++ } else if (test_bit(wIndex, &ehci->suspended_ports)) { ++ clear_bit(wIndex, &ehci->suspended_ports); ++ ehci->reset_done[wIndex] = 0; ++ if (temp & PORT_PE) ++ set_bit(wIndex, &ehci->port_c_suspend); ++ } ++ + if (temp & PORT_OC) + status |= 1 << USB_PORT_FEAT_OVER_CURRENT; + if (temp & PORT_RESET) +@@ -803,6 +813,7 @@ static int ehci_hub_control ( + || (temp & PORT_RESET) != 0) + goto error; + ehci_writel(ehci, temp | PORT_SUSPEND, status_reg); ++ set_bit(wIndex, &ehci->suspended_ports); + break; + case USB_PORT_FEAT_POWER: + if (HCS_PPC (ehci->hcs_params)) diff --git a/usb/usb-snoop-processes-opening-usbfs-device-files.patch b/usb/usb-snoop-processes-opening-usbfs-device-files.patch new file mode 100644 index 00000000000000..89271209586a09 --- /dev/null +++ b/usb/usb-snoop-processes-opening-usbfs-device-files.patch @@ -0,0 +1,31 @@ +From linux-usb-owner@vger.kernel.org Mon Oct 6 21:11:07 2008 +From: Alan Stern <stern@rowland.harvard.edu> +Date: Mon, 6 Oct 2008 11:24:26 -0400 (EDT) +Subject: USB: snoop processes opening usbfs device files +To: Greg KH <greg@kroah.com> +Message-ID: <Pine.LNX.4.44L0.0810061123070.2180-100000@iolanthe.rowland.org> + + +This patch (as1148) adds a new "snoop" message to usbfs when a device +file is opened, identifying the process responsible. This comes in +extremely handy when trying to determine which program is doing some +unwanted USB access. + +Signed-off-by: Alan Stern <stern@rowland.harvard.edu> +Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> + +--- + drivers/usb/core/devio.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/usb/core/devio.c ++++ b/drivers/usb/core/devio.c +@@ -624,6 +624,8 @@ static int usbdev_open(struct inode *ino + smp_wmb(); + list_add_tail(&ps->list, &dev->filelist); + file->private_data = ps; ++ snoop(&dev->dev, "opened by process %d: %s\n", task_pid_nr(current), ++ current->comm); + out: + if (ret) { + kfree(ps); |
