「言語処理10本ノック 2020」をC++で - プログラミング素人のはてなブログ

U++さんのところで見つけたので挑戦してみました。(最初の10本だけ)
C++の文字列処理が苦手なのでやってみました。Atcoderなどでも文字列の処理は重要なので、ちょうどよい題材です。

01、02は日本語が含まれていますが、C++の日本語は闇が深いので、問題の意図を変更しない範囲でアルファベットや数字に置き換えました。
また、#includeは省略しますが、以下のようなものを使っています。

#include <iostream>
#include <algorithm>
#include <vector>
#include <numeric>
#include <map>
#include <random>

using namespace std;

やってみた

00. 文字列の逆順
文字列”stressed”の文字を逆に（末尾から先頭に向かって）並べた文字列を得よ．

int main()
{
  string a = "stressed";
  for (int i = a.size() - 1; i >= 0; i--)
  {
    cout << a[i];
  }
}

01. 「パタトクカシーー」
「パタトクカシーー」という文字列の1,3,5,7文字目を取り出して連結した文字列を得よ．

int main()
{
  string a = "abababab";
  for (int i = 0; i < a.size(); i += 2)
  {
    cout << a[i];
  }
}

02. 「パトカー」＋「タクシー」＝「パタトクカシーー」
「パトカー」＋「タクシー」の文字を先頭から交互に連結して文字列「パタトクカシーー」を得よ．

int main()
{
  string a = "abcd";

  string b = "0123";
  for (int i = 0; i < a.size(); i++)
  {
    cout << a[i] << b[i];
  }
}

03. 円周率
“Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics.”という文を単語に分解し，各単語の（アルファベットの）文字数を先頭から出現順に並べたリストを作成せよ．

int main()
{
  string a = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics.";

  int count = 0;
  for (int i = 0; i < a.size(); i++)
  {
    if (a[i] == ' ' || a[i] == ',' || a[i] == '.')
    {
      if (count != 0)
        cout << count;
      count = 0;
    }
    else
    {
      count += 1;
    }
  }
}

04. 元素記号
“Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can.”という文を単語に分解し，1, 5, 6, 7, 8, 9, 15, 16, 19番目の単語は先頭の1文字，それ以外の単語は先頭に2文字を取り出し，取り出した文字列から単語の位置（先頭から何番目の単語か）への連想配列（辞書型もしくはマップ型）を作成せよ．

int count_;
int single_[] = {1, 5, 6, 7, 8, 9, 15, 16, 19};

// マグネシウムの元素記号Mg？

bool isSingle()
{
  for (int j = 0; j < sizeof(single_) / sizeof(single_[0]); j++)
  {
    if (single_[j] == count_)
      return true;
  }
  return false;
}

int main()
{
  string a = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can.";
  string tmp;
  map<string, int> mp;
  count_ = 0;
  for (int i = 0; i < a.size(); i++)
  {
    count_ += 1;
    tmp = a[i];

    if (!isSingle()) //1文字指定でなければ
    {
      tmp += a[i + 1];
    }

    mp[tmp] = count_;
    cout << tmp << endl;
    while (a[i] != ' ' && i < a.size())
    {
      i += 1;
    }
  }
}

05. n-gramPermalink
与えられたシーケンス（文字列やリストなど）からn-gramを作る関数を作成せよ．この関数を用い，”I am an NLPer”という文から単語bi-gram，文字bi-gramを得よ．

// What is n-gran?
// https://qiita.com/kazmaw/items/4df328cba6429ec210fb

int moji()
{
  string a = "I am an NLPer";
  string tmp;
  vector<string> ans;
  int count_ = 0;
  for (int i = 0; i < a.size() - 1; i++)
  {
    tmp = a[i];
    tmp += a[i + 1];
    ans.push_back(tmp);
  }
  for (int i = 0; i < ans.size(); i++)
  {
    cout << ans[i] << endl;
  }
  return 0;
}

int tango()
{
  string a = "I am an NLPer";
  string tmp = "";
  vector<string> tango;
  vector<string> ans;
  int count_ = 0;
  for (int i = 0; i < a.size(); i++)
  {
    if (a[i] != ' ')
    {
      tmp += a[i];
    }
    else
    {
      tango.push_back(tmp);
      tmp = "";
    }
  }
  tango.push_back(tmp);
  for (int i = 0; i < tango.size() - 1; i++)
  {
    cout << tango[i] + " " + tango[i + 1] << endl;
  }
  return 0;
}

int main()
{
  moji();
  tango();
}

06. 集合
“paraparaparadise”と”paragraph”に含まれる文字bi-gramの集合を，それぞれ, XとYとして求め，XとYの和集合，積集合，差集合を求めよ．さらに，’se’というbi-gramがXおよびYに含まれるかどうかを調べよ．

int main()
{
  string a = "paraparaparadise";
  string b = "paragraph";
  string tmp;
  vector<string> x;
  vector<string> y;
  int count_ = 0;
  for (int i = 0; i < a.size() - 1; i++)
  {
    tmp = a[i];
    tmp += a[i + 1];
    auto itr = find(x.begin(), x.end(), tmp); //xのなかにも重複があるのでこれを除く
    if (itr == x.end())
      x.push_back(tmp);
  }
  for (int i = 0; i < b.size() - 1; i++)
  {
    tmp = b[i];
    tmp += b[i + 1];
    auto itr = find(y.begin(), y.end(), tmp); //yのなかにも重複があるのでこれを除く
    if (itr == y.end())
      y.push_back(tmp);
  }

  //和
  vector<string> wa;
  wa = x;
  for (int i = 0; i < y.size(); i++)
  {
    auto itr = find(x.begin(), x.end(), y[i]);
    if (itr == x.end())
    {
      wa.push_back(y[i]);
    }
  }
  cout << "***wa***" << endl;
  for (int i = 0; i < wa.size(); i++)
  {
    cout << wa[i] << endl;
  }
  //積
  vector<string> seki;
  for (int i = 0; i < y.size(); i++)
  {
    auto itr = find(x.begin(), x.end(), y[i]);
    if (itr != x.end())
    {
      seki.push_back(y[i]);
    }
  }
  cout << "***seki***" << endl;
  for (int i = 0; i < seki.size(); i++)
  {
    cout << seki[i] << endl;
  }
  //差
  vector<string> sa;
  for (int i = 0; i < x.size(); i++)
  {
    auto itr = find(y.begin(), y.end(), x[i]);
    if (itr == y.end())
    {
      sa.push_back(x[i]);
    }
  }
  cout << "***sa***" << endl;
  for (int i = 0; i < sa.size(); i++)
  {
    cout << sa[i] << endl;
  }

  cout << "***\"se\" in X and Y***" << endl;
  if (find(x.begin(), x.end(), "se") != x.end() && find(y.begin(), y.end(), "se") != y.end())
    cout << "True" << endl;
  else
    cout << "False" << endl;
}

07. テンプレートによる文生成
引数x, y, zを受け取り「x時のyはz」という文字列を返す関数を実装せよ．さらに，x=12, y=”気温”, z=22.4として，実行結果を確認せよ．

int main()
{
  string x, y, z;
  cin >> x >> y >> z;
  cout << x << " ji no " << y << " ha " << z << endl;
}

08. 暗号文Permalink
与えられた文字列の各文字を，以下の仕様で変換する関数cipherを実装せよ．
英小文字ならば(219 - 文字コード)の文字に置換
その他の文字はそのまま出力
この関数を用い，英語のメッセージを暗号化・復号化せよ

//わかりません。

09. TypoglycemiaPermalink
スペースで区切られた単語列に対して，各単語の先頭と末尾の文字は残し，それ以外の文字の順序をランダムに並び替えるプログラムを作成せよ．ただし，長さが４以下の単語は並び替えないこととする．適当な英語の文（例えば”I couldn’t believe that I could actually understand what I was reading : the phenomenal power of the human mind .”）を与え，その実行結果を確認せよ．

int main()
{
  string x = "I couldn\'t believe that I could actually understand what I was reading : the phenomenal power of the human mind .";

  vector<string> v;
  string tmp;
  int count = 0;
  for (int i = 0; i < x.size(); i++)
  {
    if (x[i] != ' ')
    {
      tmp += x[i];
    }
    else
    {
      v.push_back(tmp);
      tmp = "";
      count += 1;
    }
  }
  v.push_back(tmp);

  for (int i = 0; i < v.size(); i++)
  {
    if (v[i].size() > 4)
    {
      // シャッフル
      std::random_device seed_gen;
      std::mt19937 engine(seed_gen());
      std::shuffle(v[i].begin() + 1, v[i].end() - 1, engine);
    }
  }
  for (int i = 0; i < v.size(); i++)
  {
    cout << v[i] << " ";
  }
}