Loading

Bash Shell Scripts and Co

  1. Bash Shell Scripts and Commands
  2. ===============================
  3.  
  4. #pretty print a JSON file to stdout
  5.  
  6.     cat file.json | python -m json.tool
  7.  
  8.  
  9. #cartesian product of two files
  10.  
  11.     while read a; do while read b; do echo "$a;$b"; done < a_democratics.txt; done < b_republicans.txt
  12.  
  13.  
  14. #extract anchor links from wiki_text
  15.  
  16.     egrep -o '\[\[([A-Za-z]+\s?)+\s?(\(([A-Za-z])+\))?\|([A-Za-z]+\s?)+\]\]' wiki_text_mediawiki.txt
  17.  
  18.  
  19. #rename file inside directory to dir_name
  20.  
  21.     for i in */; do cd $i; mv * `echo ${i::-1}`; cd ../; done
  22.  
  23.  
  24. #add '_old' do directories names
  25.  
  26.     for i in */; do mv $i `echo ${i::-1}.old`; done
  27.  
  28.  
  29. #move files outside directory into parent directory
  30.  
  31.     for i in */; do mv $i/* .; done
  32.  
  33.  
  34. #clean tags from a file
  35.    
  36.     sed 's/<[^>]*>//g' file.xml
  37.  
  38.  
  39. #remove lines from a file containing two consequent ocurrences of tab spaces
  40.  
  41.     sed '/\t\t/d' file.txt
  42.  
  43.  
  44. #remove all URLS from file
  45.  
  46.     sed -e 's!http[s]\?://\S*!!g' big_corpus_blogs.txt > big_corpus_blogs_no_urls.txt
  47.  
  48.  
  49. #find overlapping named-entities
  50.  
  51.     grep -oP '<[A-Z]+>[^<]+<[A-Z]+>[^<]+</[A-Z]+>[^<]+</[A-Z]+>' nyt_eng_199411.txt.tagged
  52.  
  53.  
  54. #add tag to a string in a file
  55.  
  56.     sed 's/ Facebook / <ORG>Facebook<\/ORG> /g' sentences_matched_freebase.txt > sentences_matched_freebase2.txt

Comments