In another project I needed a self-contained shell script to extract a bunch of images from a tar archive but I didn't want the extra file so I included a hex dump of the tar file in the script and decoded it at run time. Hex, however is wasteful; it's a 2:1 ratio. I looked at base 64 mime encoding but that required utilities that may or may not exist on all target systems. I didn't want my shell script to try to compile my own because there was no guarantee that all targets had a C compiler. Doing it in pure bourne shell was possible but very very slow, so I looked at awk. The problem with awk is that it lacks the ability to read pure binary data to do encoding. Solution: have awk use od (octal dump) convert the input from binary to decimal values. This worked great, I could then include a shell function that calls awk right and then pipe "here-document" data through the function and then pipe the output into tar. I then shortened the script as much as possible to reduce size, so it's almost unreadable I'm afraid. After a while, I discovered an article about ASCII85 on wikipedia and wrote an awk implementation of it using the same tricks I had used for base64. You can find the scripts in the attachments, but they're pasted here for your perusal pleasure, or masochism. Both of these scripts are less than one kilo-byte each.. Note that if you plan on running these on Solaris, use the /usr/xpg4/bin versions of awk and od as the other ones are quite brain-dead. b64: #!/usr/bin/awk -ffunction encode64() { while( "od -v -t x1" | getline ) { for(c=9; c<=length($0); c++) { d=index("0123456789abcdef",substr($0,c,1)); if(d--) { for(b=1; b<=4; b++ ) { o=o*2+int(d/8); d=(d*2)%16; if(++obc==6) { printf substr(b64,o+1,1); if(++rc>75) { printf("\n"); rc=0; } obc=0; o=0; } } } } } if(obc) { while(obc++<6) { o=o*2; } printf "%c",substr(b64,o+1,1); } print "==";}function decode64() { while( getline < "/dev/stdin" ) { for(i=1;i<=length($0);i++) { c=index(b64,substr($0,i,1)); if(c--) { for(b=0;b<6;b++) { o=o*2+int(c/32); c=(c*2)%64; if(++obc==8) { printf "%c",o; obc=0; o=0; } } } } }}BEGIN { b64="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" if(ARGV[1]=="d") { decode64(); exit; } encode64();}b85: #!/usr/bin/awk -ffunction l(){if(++p>72){p=0;print t}}function s(v,b){ if(v==0) { printf(z);l() } else { o=t;for(n=0;n<5;n++){o=o substr(h,(v%g)+1,1);v/=g} while(n>4-b){printf(c,substr(o,n--,1));l()} }}function e(){ printf("<~"); while("od -vtu1"|getline){ for(y=1;y<NF;){i+=($(++y)*m);m/=a;if(++k>3){s(i,k);k=i=0;m=j}} } if(k) s(i,k); p++;l();print "~>"}function d(){ while(!f&&getline<"/dev/stdin"){if(substr($0,1,2)=="<~") f=1} while(f){ while(++p<=length($0)&&f){ n=index(h,substr($0,p,1));if(substr($0,p,2)=="~>") f=0; if(n>g) { printf(c c c c,0,0,0,0) } else { if(n--){ q=g*q+n; if(++i>4){ while(--i){printf(c,(q/j)%a);q*=a} q=0 } } } } p=0;if(!getline<"/dev/stdin") f=0 } if(i) { q=++q*g^(5-i--); while(i--){printf(c,(q/j)%a);q=a*(q-(j*int(q/j)))} }}BEGIN{ z=h="z";c="%c";a=256;i=g=85;m=j=a^3;p=2;while(i){h=sprintf(c,32+i--) h} if(ARGV[1]=="d") d(); else e()}Example use: B$ t="This is the input text used in the encoding comparisons that are to follow. I'm making this long enough so that overhead will not be too significant in the results. ASCII85 encoding adds an extra two bytes at the beginning ( <~ ) and 2 at the end ( ~> ) so it has a little disadvantage over base64 mime encoding, but that is quickly nullified with any but the smallest input texts."VGhpcyBpcyB0aGUgaW5wdXQgdGV4dCB1c2VkIGluIHRoZSBlbmNvZGluZyBjb21wYXJpc29ucyB0aGF0IGFyZSB0byBmb2xsb3cuICBJJ20gbWFraW5nIHRoaXMgbG9uZyBlbm91Z2ggc28gdGhhdCBvdmVyaGVhZCB3aWxsIG5vdCBiZSB0b28gc2lnbmlmaWNhbnQgaW4gICB0aGUgcmVzdWx0cy4gIEFTQ0lJODUgZW5jb2RpbmcgYWRkcyBhbiBleHRyYSB0d28gYnl0ZXMgYXQgdGhlIGJlZ2lubmluZyAoIDx+ICkgYW5kIDIgYXQgdGhlIGVuZCAoIH4+ICkgc28gaXQgaGFzIGEgbGl0dGxlIGRpc2FkdmFudGFnZSBvdmVyIGJhc2U2NCBtaW1lIGVuY29kaW5nLCBidXQgdGhhdCBpcyBxdWlja2x5IG51bGxpZmllZCB3aXRoIGFueSBidXQgdGhlIHNtYWxsZXN0IGlucHV0IHRleHRzLgo==<~<+oue+DGm>FD,5.Bl7m4F<G[:G]Y'NF(Jl)Bl5&8BOr;tDI[TqBl7Q+@rH4'@<-('Df0V=FD,*)+CT;%+EVNEAoDL%Dg*fV+A!qt+DkP&Bl7Q+FD,B0+Dbt6B-:c'Dfo]++EMHDFD,*)+E)F7EbK#mA0?)1Cht53Dfd+2AKZ)5D]j+8B5VEqBk(RhF<G:8+<VeKBOr<,ATN!1FE9&W+@/pn8P(m!+D#G#De*R"B-:VnA9/l%DBNM8FE1e4FE_XG@X3',F!+n5+EV:.+C\npBl7g&DJ((?+?Y)q.3N&:A0<WM@<<W6BOr;tDIak<+FZKs.3N\M+DGp?BOPs)@3BB#FED>1+Co2-@:XOiDKK<"AKYo7ATAo&@<6!<1a$XLD.Oi$DI[TqBl7Q7+C]J8+EV:*F<G:=+E;O<@r#n++Du=<Ch[KqARlp-Bln#2@;^?5@Wcc8FD,5.F)>?%Ch7[0+DG_4F`\aJAU&<</d_~>This is the input text used in the encoding comparisons that are to follow. I'm making this long enough so that overhead will not be too significant in the results. ASCII85 encoding adds an extra two bytes at the beginning ( <~ ) and 2 at the end ( ~> ) so it has a little disadvantage over base64 mime encoding, but that is quickly nullified with any but the smallest input texts. |


